def update_feed_xml(feed_uid, feed_xml): """Update a feed URL and fetch the feed. Returns the number of new items""" feed_uid = int(feed_uid) r = requests.get(feed_xml) f = feedparser.parse(r.content) if not f.feed: raise ParseError normalize.normalize_feed(f) with dbop.db() as db: c = db.cursor() clear_errors(db, c, feed_uid, f) try: c.execute("""update fm_feeds set feed_xml=?, feed_html=? where feed_uid=?""", [feed_xml, str(f.feed['link']), feed_uid]) except sqlite3.IntegrityError, e: if 'feed_xml' in str(e): db.rollback() raise FeedAlreadyExists else: db.rollback() raise UnknownError(str(e)) filters.load_rules(c) num_added = process_parsed_feed(db, c, f, feed_uid) db.commit() return num_added
def purge_reload(feed_uid): reload(transform) feed_uid = int(feed_uid) if feed_uid in feed_guid_cache: del feed_guid_cache[feed_uid] with dbop.db() as db: c = db.cursor() # refresh filtering rules filters.load_rules(c) c.execute("delete from fm_items where item_feed_uid=? and item_rating=0", [feed_uid]) c.execute("""delete from fm_tags where exists ( select item_uid from fm_items where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0 )""", [feed_uid]) c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL where feed_uid=?""", [feed_uid]) c.execute("""select feed_xml from fm_feeds where feed_uid=?""", [feed_uid]) feed_xml = c.fetchone()[0] db.commit() r = requests.get(feed_xml) f = feedparser.parse(r.content) if not f.feed: raise ParseError normalize.normalize_feed(f) clear_errors(db, c, feed_uid, f) filters.load_rules(c) num_added = process_parsed_feed(db, c, f, feed_uid) db.commit()
def purge_reload(feed_uid): reload(transform) feed_uid = int(feed_uid) if feed_uid in feed_guid_cache: del feed_guid_cache[feed_uid] from singleton import db c = db.cursor() try: # refresh filtering rules filters.load_rules(db, c) c.execute("delete from fm_items where item_feed_uid=? and item_rating=0", [feed_uid]) c.execute("""delete from fm_tags where exists ( select item_uid from fm_items where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0 )""", [feed_uid]) c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL where feed_uid=?""", [feed_uid]) c.execute("select feed_xml from fm_feeds where feed_uid=?", [feed_uid]) feed_xml = c.fetchone()[0] db.commit() f = feedparser.parse(feed_xml) if not f.feed: raise ParseError normalize.normalize_feed(f) clear_errors(db, c, feed_uid, f) filters.load_rules(db, c) num_added = process_parsed_feed(db, c, f, feed_uid) db.commit() finally: c.close()
def update_feed_xml(feed_uid, feed_xml): """Update a feed URL and fetch the feed. Returns the number of new items""" feed_uid = int(feed_uid) f = feedparser.parse(feed_xml) if not f.feed: raise ParseError normalize.normalize_feed(f) from singleton import db c = db.cursor() clear_errors(db, c, feed_uid, f) try: try: c.execute("update fm_feeds set feed_xml=?, feed_html=? where feed_uid=?", [feed_xml, str(f.feed['link']), feed_uid]) except sqlite.IntegrityError, e: if 'feed_xml' in str(e): db.rollback() raise FeedAlreadyExists else: db.rollback() raise UnknownError(str(e)) filters.load_rules(db, c) num_added = process_parsed_feed(db, c, f, feed_uid) db.commit() return num_added
def fetch_feed(feed_uid, feed_xml, feed_etag, feed_modified): if not feed_etag: feed_etag = None if not feed_modified: feed_modified = None try: f = feedparser.parse(feed_xml, etag=feed_etag, modified=feed_modified) except socket.timeout: if param.debug: print >> param.log, 'EEEEE error fetching feed', feed_xml f = {'channel': {}, 'items': []} except: if param.debug: util.print_stack() f = {'channel': {}, 'items': []} normalize.normalize_feed(f) return f
def fetch_feed(feed_uid, feed_xml, feed_etag, feed_modified): if not feed_etag: feed_etag = None if not feed_modified: feed_modified = None try: r = requests.get(feed_xml, headers={'If-None-Match': feed_etag}) if r.content == '': return {'channel': {}, 'items': [], 'why': 'no change since Etag'} f = feedparser.parse(r.content, etag=r.headers.get('Etag'), modified=feed_modified) except (socket.timeout, requests.exceptions.RequestException) as e: if param.debug: print >> param.log, 'EEEEE error fetching feed', feed_xml, e f = {'channel': {}, 'items': [], 'why': repr(e)} except: if param.debug: util.print_stack() f = {'channel': {}, 'items': [], 'why': repr(sys.exc_info[1])} normalize.normalize_feed(f) return f
def add_feed(feed_xml): """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)""" with dbop.db() as db: c = db.cursor() feed_xml = feed_xml.replace('feed://', 'http://') # verify the feed r = requests.get(feed_xml) f = feedparser.parse(r.content) if 'url' not in f: f['url'] = feed_xml # CVS versions of feedparser are not throwing exceptions as they should # see: # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937 if not f.feed or ('link' not in f.feed or 'title' not in f.feed): # some feeds have multiple links, one for self and one for PuSH if f.feed and 'link' not in f.feed and 'links' in f.feed: try: for l in f.feed['links']: if l['rel'] == 'self': f.feed['link'] = l['href'] except KeyError: pass if not f.feed or ('link' not in f.feed or 'title' not in f.feed): # try autodiscovery try: feed_xml = AutoDiscoveryHandler().feed_url(feed_xml) except HTMLParser.HTMLParseError: # in desperate conditions, regexps ride to the rescue try: feed_xml = re_autodiscovery(feed_xml)[0][1] except: util.print_stack() raise AutodiscoveryParseError if not feed_xml: raise ParseError r = requests.get(feed_xml) f = feedparser.parse(r.content) if not f.feed: raise ParseError # we have a valid feed, normalize it normalize.normalize_feed(f) feed = { 'xmlUrl': f['url'], 'htmlUrl': str(f.feed['link']), 'etag': r.headers.get('Etag'), 'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'), 'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace') } for key, value in feed.items(): if type(value) == str: feed[key] = value filters.load_rules(c) try: c.execute("""insert into fm_feeds (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed) feed_uid = c.lastrowid num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid) db.commit() return feed_uid, feed['title'], num_added, num_filtered except sqlite3.IntegrityError, e: if 'feed_xml' in str(e): db.rollback() raise FeedAlreadyExists else: db.rollback() raise UnknownError(str(e))
def add_feed(feed_xml): """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)""" from singleton import db c = db.cursor() feed_xml = feed_xml.replace('feed://', 'http://') try: # verify the feed f = feedparser.parse(feed_xml) # CVS versions of feedparser are not throwing exceptions as they should # see: # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937 if not f.feed or ('link' not in f.feed or 'title' not in f.feed): # some feeds have multiple links, one for self and one for PuSH if f.feed and 'link' not in f.feed and 'links' in f.feed: try: for l in f.feed['links']: if l['rel'] == 'self': f.feed['link'] = l['href'] except KeyError: pass if not f.feed or ('link' not in f.feed or 'title' not in f.feed): # try autodiscovery try: feed_xml = AutoDiscoveryHandler().feed_url(feed_xml) except HTMLParser.HTMLParseError: # in desperate conditions, regexps ride to the rescue try: feed_xml = re_autodiscovery(feed_xml)[0][1] except: util.print_stack() raise AutodiscoveryParseError if not feed_xml: raise ParseError f = feedparser.parse(feed_xml) if not f.feed: raise ParseError # we have a valid feed, normalize it normalize.normalize_feed(f) feed = { 'xmlUrl': f['url'], 'htmlUrl': str(f.feed['link']), 'etag': f.get('etag'), 'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'), 'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace') } for key, value in feed.items(): if type(value) == str: feed[key] = value filters.load_rules(db, c) try: c.execute("""insert into fm_feeds (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed) feed_uid = c.lastrowid num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid) db.commit() return feed_uid, feed['title'], num_added, num_filtered except sqlite.IntegrityError, e: if 'feed_xml' in str(e): db.rollback() raise FeedAlreadyExists else: db.rollback() raise UnknownError(str(e)) finally: c.close()