Python Newsfeed.search 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pattern.web

클래스/타입: Newsfeed

메소드/함수: search

hotexamples.com에서의 예제들: 5

Python Newsfeed.search - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pattern.web.Newsfeed.search에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Newsfeed(6)

search(2)

예제 #1

파일 보기

sys.path.insert(0, os.path.join("..", "..", ".."))

from pattern.web import Newsfeed, plaintext, URL
from pattern.table import date

# This example reads a given RSS or Atom newsfeed channel.
# Some sample newsfeeds to try out:
NATURE = "http://www.nature.com/nature/current_issue/rss/index.html"
SCIENCE = "http://www.sciencemag.org/rss/podcast.xml"
HERALD = "http://www.iht.com/rss/frontpage.xml"
TIME = "http://feeds.feedburner.com/time/topstories"
CNN = "http://rss.cnn.com/rss/edition.rss"

engine = Newsfeed()

for result in engine.search(CNN, cached=True):
    print result.title.upper()
    print plaintext(result.description)  # Remove HTML formatting.
    print result.url
    print result.date
    print

# Newsfeed item URL's lead to the page with the full article.
# Since this page can have any kind of formatting, there is no default way to read it,
# but we can simply download the source HTML and convert it to plain text:
#html = URL(result.url).download()
#print plaintext(html)

# The resulting text can contain a lot of garbage.
# An better way to do this is to use a DOM parser and select the HTML elements we want.
# This is demonstrated in the next example.

예제 #2

파일 보기

파일: 06-feed.py 프로젝트: Dirklectisch/cityment

import os, sys; sys.path.insert(0, os.path.join("..", "..", ".."))

from pattern.web   import Newsfeed, plaintext, URL
from pattern.table import date

# This example reads a given RSS or Atom newsfeed channel.
# Some sample newsfeeds to try out:
NATURE  = "http://www.nature.com/nature/current_issue/rss/index.html"
SCIENCE = "http://www.sciencemag.org/rss/podcast.xml"
HERALD  = "http://www.iht.com/rss/frontpage.xml"
TIME    = "http://feeds.feedburner.com/time/topstories"
CNN     = "http://rss.cnn.com/rss/edition.rss"

engine = Newsfeed()

for result in engine.search(CNN, cached=True):
    print result.title.upper()
    print plaintext(result.description) # Remove HTML formatting.
    print result.url
    print result.date
    print

# Newsfeed item URL's lead to the page with the full article.
# Since this page can have any kind of formatting, there is no default way to read it,
# but we can simply download the source HTML and convert it to plain text:
#html = URL(result.url).download()
#print plaintext(html)

# The resulting text can contain a lot of garbage.
# An better way to do this is to use a DOM parser and select the HTML elements we want.
# This is demonstrated in the next example.

예제 #3

파일 보기

파일: myrss.py 프로젝트: vitojph/myrss

alchemyapi = AlchemyAPI()

RSS_LIST = [
  (u"Lifehacker", "http://feeds.gawker.com/lifehacker/vip"),
  (u"The Verge", "http://www.theverge.com/rss/index.xml"),
  (u"Naukas", "http://naukas.com/feed/"),
  (u"Zen Habits", "http://feeds.feedburner.com/zenhabits?format=xml"),
  (u"Yuri", "http://www.lapizarradeyuri.com/feed/"),
  (u"Menéame", "http://www.meneame.net/rss")
]

items = []

for feed in RSS_LIST:
  feedlist = []
  for result in reader.search(feed[1])[:10]:
    clean_text = plaintext(result.text)
    response = alchemyapi.entities("text", result.text)

    entities = []
    for entity in response["entities"]:
      if entity.has_key("disambiguated"):
        dbpedia_uri = entity["disambiguated"]["dbpedia"]
      else:
        dbpedia_uri = None
      entities.append((entity["text"], dbpedia_uri))

    feedlist.append(dict(title=result.title, url=result.url, text=clean_text, entities=entities))
  items.append(dict(site=feed[0], feedlist=feedlist))

@app.route('/')

예제 #4

파일 보기

파일: crawl.py 프로젝트: LeonieVS/thesis

    'https://ejbron.wordpress.com/feed/'
}

PATH = pd('news.csv')

try:
    csv = Datasheet.load(PATH)
    seen = set(csv.columns[-2])  # use url as id
except:
    csv = Datasheet()
    seen = set()

for (label, name), url in sources.items():
    try:
        f = Newsfeed()
        f = f.search(url, cached=False)
    except:
        continue

    for r in f:

        # 1) Download source & parse the HTML tree:
        try:
            src = URL(r.url).download(cached=True)
            dom = DOM(src)
        except Exception as e:
            continue

        # 2) Find article text w/ CSS selectors:
        for selector in (
                "article[class*='node-article']",  # The Hill

예제 #5

-1

파일 보기

파일: feeds.py 프로젝트: pratyush-sngh/python_data_collection

import os, sys; sys.path.append(os.path.join("..", "..", ".."))

from pattern.web   import Newsfeed, plaintext, URL
from pattern.table import date

wsj    = "http://online.wsj.com/xml/rss/3_7014.xml"

engine = Newsfeed()

for result in engine.search(wsj, cached=True):
    print result.title.upper()
    print plaintext(result.description) # Remove HTML formatting.
    print result.url
    print result.date
    print