Beispiel #1
0
def get_news(lang="en-CA", max_items=4, max_age=30):
    "Pull news items from Library & Archives feed"

    import atoma

    url = "https://biblio.laurentian.ca/research/news.xml"
    news_link = "https://biblio.laurentian.ca/research/news"
    news_heading = "News"

    if lang.startswith("fr"):
        url = "https://biblio.laurentian.ca/research/fr/news.xml"
        news_link = "https://biblio.laurentian.ca/research/fr/nouvelles"
        news_heading = "Nouvelles"

    r = requests.get(url)
    feed = atoma.parse_rss_bytes(r.content)

    news_items = []
    x = 0
    for item in feed.items:
        if x == max_items or (x > 0 and item.pub_date.timestamp() <
                              (datetime.datetime.today() -
                               datetime.timedelta(days=max_age)).timestamp()):
            break
        news_items.append({
            "title": item.title,
            "url": item.link,
            "published": item.pub_date.date()
        })
        x = x + 1

    return news_items
Beispiel #2
0
 def fetch(self):
     get = self.session.get if self.session is not None else requests.get
     resp = get(self.url, headers=generate_headers(self.url))
     try:
         return parse_atom_bytes(resp.content)
     except:
         return parse_rss_bytes(resp.content)
Beispiel #3
0
    def retrieve_feed(self, url):
        response = requests.get(url)
        feed = atoma.parse_rss_bytes(response.content)
        items = []
        c = self.conn.cursor()
        for item in feed.items:
            items.append({
                "title": item.title,
                "link": item.link,
                "description": item.description,
                "description_text": item.description,
                "pubDate": str(item.pub_date)
            })
            c.execute(
                """INSERT INTO news (title, link, description,
                         published, feed, liked) values
                         (?, ?, ?, ?, ?, ?)""",
                (item.title, item.link, item.description, item.pub_date,
                 feed.link, False))
        self.conn.commit()

        return {
            "channel": {
                "title": feed.title,
                "link": feed.link,
                "url": feed.link
            },
            "items": items
        }
Beispiel #4
0
def test_feed(client, post: Post, second_post: Post, hidden_post: Post):
    # set the post dates to they are listed in a predictable order
    post.created = datetime(2019, 12, 2, tzinfo=utc)
    second_post.created = datetime(2019, 11, 1, tzinfo=utc)

    post.save()
    second_post.save()

    # get the feed
    url = reverse('blog:feed')
    response = client.get(url)
    feed = atoma.parse_rss_bytes(response.content)

    # check the title and description match those in the config
    assert feed.title == "Chocolate"
    assert feed.description == "Ice Cream"

    # check the link is correct
    assert urlparse(feed.link).path == reverse('blog:list')

    # check we only have two visible items
    assert len(feed.items) == 2

    # first post
    item_one = feed.items[0]
    assert urlparse(item_one.link).path == post.get_absolute_url()
    assert item_one.title == post.title
    assert item_one.pub_date == post.created

    # second post
    item_two = feed.items[1]
    assert urlparse(item_two.link).path == second_post.get_absolute_url()
    assert item_two.title == second_post.title
    assert item_two.pub_date == second_post.created
Beispiel #5
0
    def serializer(self, content: bytes):
        feed = None
        try:
            feed = atoma.parse_rss_bytes(content)
        except Exception:
            self._logger.error("can't parser RSS")

        return feed
Beispiel #6
0
 async def fetch(self):
     async with self.session.get(self.url,
                                 headers=generate_headers(
                                     self.url)) as response:
         content = await response.read()
         try:
             return parse_atom_bytes(content)
         except:
             return parse_rss_bytes(content)
Beispiel #7
0
 def __init__(self, link):
     super().__init__()
     xml = get(link)
     feed = atoma.parse_rss_bytes(xml.content)
     self.title = feed.title
     for item in feed.items:
         self.items.append(
             RssArticle(item.title, item.description, item.link,
                        item.pub_date))
     self.items.reverse()
def _get_feed(feed_content, payload):
    if payload['source']['type'] == 'atom':
        feed = atoma.parse_atom_bytes(feed_content)
    elif payload['source']['type'] == 'rss':
        feed = atoma.parse_rss_bytes(feed_content)
    else:
        raise Exception(
            "SourceError", "Unkonwn feed type '%s'. Choose 'rss' or 'atom'." %
            payload['source']['type'])
    return feed
Beispiel #9
0
 def parse(self, url):
     try:
         content = self.session.get(url).content
     except requests.exceptions.ConnectionError as e:
         logger.exception(e)
         raise SGFeedUpdateFailedException
     # print(content)
     try:
         return atoma.parse_rss_bytes(content)
     except atoma.exceptions.FeedXMLError as e:
         logger.error(f"{e}: {content}")
         raise SGFeedUpdateFailedException
Beispiel #10
0
def feed(request):
    # get feed
    feed = requests.get(
        'https://www.standaard.be/rss/section/1f2838d4-99ea-49f0-9102-138784c7ea7c'
    )
    articles = atoma.parse_rss_bytes(feed.content).items

    # get article path for each article
    for article in articles:
        article.path = article.guid.replace("https://www.standaard.be/cnt/",
                                            "")

    # pass context to template
    return render(request, 'articles/feed.html', {'articles': articles})
def fetch_and_parse_rssfeed_atom(url_file_stream_or_string, site_cookies_dict=None,
                                 user_agent=None, request_headers=None, timeout=10):
    result = http.download_file(url_file_stream_or_string, site_cookies_dict=site_cookies_dict,
                                user_agent=user_agent, request_headers=request_headers, timeout=timeout)
    import atoma
    atoma.rss.supported_rss_versions = []
    parsed_feeds = {}

    try:
        atoma_result = atoma.parse_rss_bytes(result['content'])
        parsed_feeds = atoma_result_to_dict(atoma_result)
    except atoma.FeedXMLError as err:
        readable_body = http.clean_html_body(result['content'])
        parsed_feeds["raw_result"] = readable_body
        parsed_feeds["bozo"] = 1
        parsed_feeds["feed"] = {}
        parsed_feeds["items"] = []
        parsed_feeds["bozo_exception"] = err

    parsed_feeds['parser'] = "atoma"
    return parsed_feeds
Beispiel #12
0
def detail(request, article_path):
    # get feed again
    feed = requests.get(
        'https://www.standaard.be/rss/section/1f2838d4-99ea-49f0-9102-138784c7ea7c'
    )
    articles = atoma.parse_rss_bytes(feed.content).items

    # find article with corresponding article path
    for index, article in enumerate(articles):
        article.path = article.guid.replace("https://www.standaard.be/cnt/",
                                            "")
        if article_path == article.path:
            lookup_article_index = index

    # get description article found
    lookup_article = articles[lookup_article_index]

    # clean description article
    lookup_article.description = lookup_article.description.replace(
        "<P>", "").replace("</P>", "").replace("<p>", "").replace("</p>", "")

    # pass context to template
    return render(request, 'articles/detail.html', {'article': lookup_article})
Beispiel #13
0
def RssRead():
  #Criar base dados JSON - db.json
  db = TinyDB('./db.json', sort_keys=True, indent=4, separators=(',', ': '), ensure_ascii=False, encoding='utf-8')
  titulo_postagem = Query()

  #Ler o feed Aleteia
  feed_name = "Aleteia"
  url = "https://pt.aleteia.org/feed/"
  response = requests.get(url)
  feed = atoma.parse_rss_bytes(response.content)

  #Salvar os itens do feed Aleteia na base de dados json
  for post in feed.items:

      data_postagem = post.pub_date.strftime('%d/%m/%Y')
      data_hoje = datetime.date.today().strftime('%d/%m/%Y')
      if(data_hoje == post.pub_date.strftime('%d/%m/%Y')):
          date = post.pub_date.strftime('%d/%m/%Y')
          if(db.contains(titulo_postagem.titulo == post.title)==False):
              db.insert({
                  'source': 'Aleteia',
                  'date': date,
                  'titulo': post.title,
                  'descrição': post.description,
                  'link': post.link,
                  'categorias': post.categories,
                  'postado': 'não',
                  })

  print('RSS Aleteia lido com sucesso!')
  #Ler o feed VaticanNews
  feed_name = "Vatican News"
  url = "https://www.vaticannews.va/pt.rss.xml"
  response = requests.get(url)
  feed = atoma.parse_rss_bytes(response.content)


  #Salvar os itens do feed VaticanNews na base de dados json
  for post in feed.items:
      data_postagem = post.pub_date.strftime('%d/%m/%Y')
      data_hoje = datetime.date.today().strftime('%d/%m/%Y')
      if(data_hoje == post.pub_date.strftime('%d/%m/%Y')):
          date = post.pub_date.strftime('%d/%m/%Y')
          if(db.contains(titulo_postagem.titulo == post.title)== False):
              db.insert({
                  'source': 'VaticanNews',
                  'date': date,
                  'titulo': post.title,
                  'descrição': post.description,
                  'link': post.link,
                  'categorias': post.categories,
                  'postado': 'não',
                  })

  print('RSS VaticanNews lido com sucesso!')
  #Ler o feed Aci Digital
  feed_name = "Aci Digital"
  url = "https://www.acidigital.com/rss/rss.php"
  response = requests.get(url)
  feed = atoma.parse_rss_bytes(response.content)


  #Salvar os itens do feed AciDigital na base de dados json
  for post in feed.items:
      data_postagem = post.pub_date.strftime('%d/%m/%Y')
      data_hoje = datetime.date.today().strftime('%d/%m/%Y')
      if(data_hoje == post.pub_date.strftime('%d/%m/%Y')):
          date = post.pub_date.strftime('%d/%m/%Y')
          if(db.contains(titulo_postagem.titulo == post.title)== False):
              db.insert({
                  'source': 'AciDigital',
                  'date': date,
                  'titulo': post.title,
                  'descrição': post.description,
                  'link': post.link,
                  'categorias': post.categories,
                  'postado': 'não',
                  })

  print('RSS Acidigital lido com sucesso!')
Beispiel #14
0
 def _extract_news_feed_items(self, proxies):
     content = self.parser.get_content(proxies=proxies)
     news_feed = atoma.parse_rss_bytes(content)
     return news_feed.items
Beispiel #15
0
import atoma
import requests

response = requests.get('???')
feed = atoma.parse_rss_bytes(response.content)
feed.items
Beispiel #16
0
def get_episodes():
    response = requests.get(os.getenv("FEED_URI"))
    feed = atoma.parse_rss_bytes(response.content)
    return feed.items
Beispiel #17
0
from urllib.request import urlopen, Request
from urllib.parse import urlencode

from atoma import parse_rss_bytes

from utils import escape, html_unescape, u, s

TWITTER_API_VERSION = '1.0'
TWITTER_API_METHOD = 'HMAC-SHA1'
TWITTER_API_END = 'https://api.twitter.com/1.1/statuses/update.json'
TWITTER_CONSUMER_KEY = os.environ.get('TWITTER_CONSUMER_KEY')
TWITTER_CONSUMER_SECRET = os.environ.get('TWITTER_CONSUMER_SECRET')
TWITTER_OAUTH_TOKEN = os.environ.get('TWITTER_OAUTH_TOKEN')
TWITTER_OAUTH_SECRET = os.environ.get('TWITTER_OAUTH_SECRET')
FEED_URL = os.environ.get('FEED_URL')
FEED_DATA = parse_rss_bytes(urlopen(FEED_URL).read())

for post in FEED_DATA.items:

    ITEM_TIMESTAMP = int(post.pub_date.strftime('%Y%m%d%H%M%S'))
    LAST_TIMESTAMP = int(datetime.now().strftime('%Y%m%d%H%M%S')) - 10000
    ITEM_TITLE = u(html_unescape(post.title))
    ITEM_LINK = u(post.guid)
    TWITTER_STATUS = ITEM_TITLE + ' ' + ITEM_LINK

    if ITEM_TIMESTAMP >= LAST_TIMESTAMP:

        SIGNATURE_TIMESTAMP = datetime.now().strftime('%s')
        SIGNATURE_ONCE = base64.b64encode(
            s(''.join([str(random.randint(0, 9)) for i in range(24)])))
        SIGNATURE_BASE_STRING_AUTH = 'oauth_consumer_key=' + escape(
Beispiel #18
0
from datetime import datetime

from atoma import parse_rss_bytes

from utils import u, html_unescape, escape, filter_json_index_by_year

json_index_content = {}
twitter_api_version = '1.0'
twitter_api_method = 'HMAC-SHA1'
twitter_api_end = 'https://api.twitter.com/1.1/statuses/update.json'
twitter_consumer_key = os.environ.get('TWITTER_CONSUMER_KEY')
twitter_consumer_secret = os.environ.get('TWITTER_CONSUMER_SECRET')
twitter_oauth_token = os.environ.get('TWITTER_OAUTH_TOKEN')
twitter_oauth_secret = os.environ.get('TWITTER_OAUTH_SECRET')
feed_url = os.environ.get('FEED_URL')
feed_data = parse_rss_bytes(urlopen(feed_url).read())

current_timestamp = int(datetime.now().strftime('%Y%m%d%H%M%S'))
current_hour = int(datetime.now().strftime('%H'))

if current_hour not in [2, 6, 9, 14, 17, 21]:
    print('Script wasnt called in a recommended hour. Aborting.')
    sys.exit(0)

for post in feed_data.items:
    post_timestamp = post.pub_date.strftime('%Y%m%d%H%M%S')
    json_index_content[post_timestamp] = {
        'title': post.title,
        'url': post.guid,
        'date': post.pub_date
    }