Beispiel #1
0
def update_feed_xml(feed_uid, feed_xml):
  """Update a feed URL and fetch the feed. Returns the number of new items"""
  feed_uid = int(feed_uid)

  r = requests.get(feed_xml)
  f = feedparser.parse(r.content)
  if not f.feed:
    raise ParseError
  normalize.normalize_feed(f)

  with dbop.db() as db:
    c = db.cursor()
    clear_errors(db, c, feed_uid, f)
    try:
      c.execute("""update fm_feeds set feed_xml=?, feed_html=?
      where feed_uid=?""",
                [feed_xml, str(f.feed['link']), feed_uid])
    except sqlite3.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
    filters.load_rules(c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
    return num_added
Beispiel #2
0
def purge_reload(feed_uid):
  reload(transform)
  feed_uid = int(feed_uid)
  if feed_uid in feed_guid_cache:
    del feed_guid_cache[feed_uid]
  with dbop.db() as db:
    c = db.cursor()
    # refresh filtering rules
    filters.load_rules(c)
    c.execute("delete from fm_items where item_feed_uid=? and item_rating=0",
              [feed_uid])
    c.execute("""delete from fm_tags
    where exists (
      select item_uid from fm_items
      where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0
    )""", [feed_uid])
    c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL
    where feed_uid=?""", [feed_uid])
    c.execute("""select feed_xml from fm_feeds
    where feed_uid=?""", [feed_uid])
    feed_xml = c.fetchone()[0]
    db.commit()
    r = requests.get(feed_xml)
    f = feedparser.parse(r.content)
    if not f.feed:
      raise ParseError
    normalize.normalize_feed(f)
    clear_errors(db, c, feed_uid, f)
    filters.load_rules(c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
Beispiel #3
0
def purge_reload(feed_uid):
  reload(transform)
  feed_uid = int(feed_uid)
  if feed_uid in feed_guid_cache:
    del feed_guid_cache[feed_uid]
  from singleton import db
  c = db.cursor()
  try:
    # refresh filtering rules
    filters.load_rules(db, c)
    c.execute("delete from fm_items where item_feed_uid=? and item_rating=0",
              [feed_uid])
    c.execute("""delete from fm_tags
    where exists (
      select item_uid from fm_items
      where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0
    )""", [feed_uid])
    c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL
    where feed_uid=?""", [feed_uid])
    c.execute("select feed_xml from fm_feeds where feed_uid=?", [feed_uid])
    feed_xml = c.fetchone()[0]
    db.commit()
    f = feedparser.parse(feed_xml)
    if not f.feed:
      raise ParseError
    normalize.normalize_feed(f)
    clear_errors(db, c, feed_uid, f)
    filters.load_rules(db, c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
  finally:
    c.close()
Beispiel #4
0
def update_feed_xml(feed_uid, feed_xml):
  """Update a feed URL and fetch the feed. Returns the number of new items"""
  feed_uid = int(feed_uid)

  f = feedparser.parse(feed_xml)
  if not f.feed:
    raise ParseError
  normalize.normalize_feed(f)

  from singleton import db
  c = db.cursor()
  clear_errors(db, c, feed_uid, f)
  try:
    try:
      c.execute("update fm_feeds set feed_xml=?, feed_html=? where feed_uid=?",
                [feed_xml, str(f.feed['link']), feed_uid])
    except sqlite.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
    filters.load_rules(db, c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
    return num_added
Beispiel #5
0
def fetch_feed(feed_uid, feed_xml, feed_etag, feed_modified):
  if not feed_etag:
    feed_etag = None
  if not feed_modified:
    feed_modified = None
  try:
    f = feedparser.parse(feed_xml, etag=feed_etag, modified=feed_modified)
  except socket.timeout:
    if param.debug:
      print >> param.log, 'EEEEE error fetching feed', feed_xml
    f = {'channel': {}, 'items': []}
  except:
    if param.debug:
      util.print_stack()
    f = {'channel': {}, 'items': []}
  normalize.normalize_feed(f)
  return f
Beispiel #6
0
def fetch_feed(feed_uid, feed_xml, feed_etag, feed_modified):
    if not feed_etag:
        feed_etag = None
    if not feed_modified:
        feed_modified = None
    try:
        r = requests.get(feed_xml, headers={'If-None-Match': feed_etag})
        if r.content == '':
            return {'channel': {}, 'items': [], 'why': 'no change since Etag'}
        f = feedparser.parse(r.content,
                             etag=r.headers.get('Etag'),
                             modified=feed_modified)
    except (socket.timeout, requests.exceptions.RequestException) as e:
        if param.debug:
            print >> param.log, 'EEEEE error fetching feed', feed_xml, e
        f = {'channel': {}, 'items': [], 'why': repr(e)}
    except:
        if param.debug:
            util.print_stack()
        f = {'channel': {}, 'items': [], 'why': repr(sys.exc_info[1])}
    normalize.normalize_feed(f)
    return f
Beispiel #7
0
def add_feed(feed_xml):
  """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)"""
  with dbop.db() as db:
    c = db.cursor()
    feed_xml = feed_xml.replace('feed://', 'http://')
    # verify the feed
    r = requests.get(feed_xml)
    f = feedparser.parse(r.content)
    if 'url' not in f:
      f['url'] = feed_xml
    # CVS versions of feedparser are not throwing exceptions as they should
    # see:
    # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # some feeds have multiple links, one for self and one for PuSH
      if f.feed and 'link' not in f.feed and 'links' in f.feed:
        try:
          for l in f.feed['links']:
            if l['rel'] == 'self':
              f.feed['link'] = l['href']
        except KeyError:
          pass
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # try autodiscovery
      try:
        feed_xml = AutoDiscoveryHandler().feed_url(feed_xml)
      except HTMLParser.HTMLParseError:
        # in desperate conditions, regexps ride to the rescue
        try:
          feed_xml = re_autodiscovery(feed_xml)[0][1]
        except:
          util.print_stack()
          raise AutodiscoveryParseError
      if not feed_xml:
        raise ParseError
      r = requests.get(feed_xml)
      f = feedparser.parse(r.content)
      if not f.feed:
        raise ParseError
    # we have a valid feed, normalize it
    normalize.normalize_feed(f)
    feed = {
      'xmlUrl': f['url'],
      'htmlUrl': str(f.feed['link']),
      'etag': r.headers.get('Etag'),
      'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'),
      'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace')
      }
    for key, value in feed.items():
      if type(value) == str:
        feed[key] = value
    filters.load_rules(c)
    try:
      c.execute("""insert into fm_feeds
      (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values
      (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed)
      feed_uid = c.lastrowid
      num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid)
      db.commit()
      return feed_uid, feed['title'], num_added, num_filtered
    except sqlite3.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
Beispiel #8
0
def add_feed(feed_xml):
  """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)"""
  from singleton import db
  c = db.cursor()
  feed_xml = feed_xml.replace('feed://', 'http://')
  try:
    # verify the feed
    f = feedparser.parse(feed_xml)
    # CVS versions of feedparser are not throwing exceptions as they should
    # see:
    # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # some feeds have multiple links, one for self and one for PuSH
      if f.feed and 'link' not in f.feed and 'links' in f.feed:
        try:
          for l in f.feed['links']:
            if l['rel'] == 'self':
              f.feed['link'] = l['href']
        except KeyError:
          pass
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # try autodiscovery
      try:
        feed_xml = AutoDiscoveryHandler().feed_url(feed_xml)
      except HTMLParser.HTMLParseError:
        # in desperate conditions, regexps ride to the rescue
        try:
          feed_xml = re_autodiscovery(feed_xml)[0][1]
        except:
          util.print_stack()
          raise AutodiscoveryParseError
      if not feed_xml:
        raise ParseError
      f = feedparser.parse(feed_xml)
      if not f.feed:
        raise ParseError
    # we have a valid feed, normalize it
    normalize.normalize_feed(f)
    feed = {
      'xmlUrl': f['url'],
      'htmlUrl': str(f.feed['link']),
      'etag': f.get('etag'),
      'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'),
      'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace')
      }
    for key, value in feed.items():
      if type(value) == str:
        feed[key] = value
    filters.load_rules(db, c)
    try:
      c.execute("""insert into fm_feeds
      (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values
      (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed)
      feed_uid = c.lastrowid
      num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid)
      db.commit()
      return feed_uid, feed['title'], num_added, num_filtered
    except sqlite.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
  finally:
    c.close()