예제 #1
0
파일: update.py 프로젝트: frostytear/temboz
def update_feed_xml(feed_uid, feed_xml):
  """Update a feed URL and fetch the feed. Returns the number of new items"""
  feed_uid = int(feed_uid)

  r = requests.get(feed_xml)
  f = feedparser.parse(r.content)
  if not f.feed:
    raise ParseError
  normalize.normalize_feed(f)

  with dbop.db() as db:
    c = db.cursor()
    clear_errors(db, c, feed_uid, f)
    try:
      c.execute("""update fm_feeds set feed_xml=?, feed_html=?
      where feed_uid=?""",
                [feed_xml, str(f.feed['link']), feed_uid])
    except sqlite3.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
    filters.load_rules(c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
    return num_added
예제 #2
0
파일: update.py 프로젝트: frostytear/temboz
def purge_reload(feed_uid):
  reload(transform)
  feed_uid = int(feed_uid)
  if feed_uid in feed_guid_cache:
    del feed_guid_cache[feed_uid]
  with dbop.db() as db:
    c = db.cursor()
    # refresh filtering rules
    filters.load_rules(c)
    c.execute("delete from fm_items where item_feed_uid=? and item_rating=0",
              [feed_uid])
    c.execute("""delete from fm_tags
    where exists (
      select item_uid from fm_items
      where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0
    )""", [feed_uid])
    c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL
    where feed_uid=?""", [feed_uid])
    c.execute("""select feed_xml from fm_feeds
    where feed_uid=?""", [feed_uid])
    feed_xml = c.fetchone()[0]
    db.commit()
    r = requests.get(feed_xml)
    f = feedparser.parse(r.content)
    if not f.feed:
      raise ParseError
    normalize.normalize_feed(f)
    clear_errors(db, c, feed_uid, f)
    filters.load_rules(c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
예제 #3
0
파일: update.py 프로젝트: fazalmajid/temboz
def purge_reload(feed_uid):
  reload(transform)
  feed_uid = int(feed_uid)
  if feed_uid in feed_guid_cache:
    del feed_guid_cache[feed_uid]
  from singleton import db
  c = db.cursor()
  try:
    # refresh filtering rules
    filters.load_rules(db, c)
    c.execute("delete from fm_items where item_feed_uid=? and item_rating=0",
              [feed_uid])
    c.execute("""delete from fm_tags
    where exists (
      select item_uid from fm_items
      where item_uid=tag_item_uid and item_feed_uid=? and item_rating=0
    )""", [feed_uid])
    c.execute("""update fm_feeds set feed_modified=NULL, feed_etag=NULL
    where feed_uid=?""", [feed_uid])
    c.execute("select feed_xml from fm_feeds where feed_uid=?", [feed_uid])
    feed_xml = c.fetchone()[0]
    db.commit()
    f = feedparser.parse(feed_xml)
    if not f.feed:
      raise ParseError
    normalize.normalize_feed(f)
    clear_errors(db, c, feed_uid, f)
    filters.load_rules(db, c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
  finally:
    c.close()
예제 #4
0
파일: update.py 프로젝트: fazalmajid/temboz
def update_feed_xml(feed_uid, feed_xml):
  """Update a feed URL and fetch the feed. Returns the number of new items"""
  feed_uid = int(feed_uid)

  f = feedparser.parse(feed_xml)
  if not f.feed:
    raise ParseError
  normalize.normalize_feed(f)

  from singleton import db
  c = db.cursor()
  clear_errors(db, c, feed_uid, f)
  try:
    try:
      c.execute("update fm_feeds set feed_xml=?, feed_html=? where feed_uid=?",
                [feed_xml, str(f.feed['link']), feed_uid])
    except sqlite.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
    filters.load_rules(db, c)
    num_added = process_parsed_feed(db, c, f, feed_uid)
    db.commit()
    return num_added
예제 #5
0
파일: update.py 프로젝트: rtgnx/temboz
def update(where_clause=''):
    with dbop.db() as db:
        c = db.cursor()
        # refresh filtering rules
        filters.load_rules(c)
        # at 3AM by default, perform house-cleaning
        if time.localtime()[3] == param.backup_hour:
            cleanup(db, c)
        # create worker threads and the queues used to communicate with them
        work_q = Queue.Queue()
        process_q = Queue.Queue()
        workers = []
        for i in range(param.feed_concurrency):
            workers.append(FeedWorker(i + 1, work_q, process_q))
            workers[-1].start()
        # assign work
        c.execute("""select feed_uid, feed_xml, feed_etag, feed_dupcheck,
    strftime('%s', feed_modified) from fm_feeds where feed_status=0 """ +
                  where_clause)
        for feed_uid, feed_xml, feed_etag, feed_dupcheck, feed_modified in c:
            if feed_modified:
                feed_modified = float(feed_modified)
                feed_modified = time.localtime(feed_modified)
            else:
                feed_modified = None
            work_q.put(
                (feed_uid, feed_xml, feed_etag, feed_modified, feed_dupcheck))
        # None is an indication for workers to stop
        for i in range(param.feed_concurrency):
            work_q.put(None)
        workers_left = param.feed_concurrency
        while workers_left > 0:
            feed_info = process_q.get()
            # exited worker
            if not feed_info:
                workers_left -= 1
            else:
                try:
                    update_feed(db, c, *feed_info)
                except:
                    util.print_stack()
                db.commit()
            # give reader threads an opportunity to get their work done
            time.sleep(1)
예제 #6
0
파일: update.py 프로젝트: fazalmajid/temboz
def update(where_clause=''):
  from singleton import db
  c = db.cursor()
  # refresh filtering rules
  filters.load_rules(db, c)
  # at 3AM by default, perform house-cleaning
  if time.localtime()[3] == param.backup_hour:
    cleanup(db, c)
  # create worker threads and the queues used to communicate with them
  work_q = Queue.Queue()
  process_q = Queue.Queue()
  workers = []
  for i in range(param.feed_concurrency):
    workers.append(FeedWorker(i + 1, work_q, process_q))
    workers[-1].start()
  # assign work
  c.execute("""select feed_uid, feed_xml, feed_etag, feed_dupcheck,
  strftime('%s', feed_modified) from fm_feeds where feed_status=0 """
            + where_clause)
  for feed_uid, feed_xml, feed_etag, feed_dupcheck, feed_modified in c:
    if feed_modified:
      feed_modified = float(feed_modified)
      feed_modified = time.localtime(feed_modified)
    else:
      feed_modified = None
    work_q.put((feed_uid, feed_xml, feed_etag, feed_modified, feed_dupcheck))
  # None is an indication for workers to stop
  for i in range(param.feed_concurrency):
    work_q.put(None)
  workers_left = param.feed_concurrency
  while workers_left > 0:
    feed_info = process_q.get()
    # exited worker
    if not feed_info:
      workers_left -= 1
    else:
      try:
        update_feed(db, c, *feed_info)
      except:
        util.print_stack()
      db.commit()
    # give reader threads an opportunity to get their work done
    time.sleep(1)
  c.close()
예제 #7
0
파일: update.py 프로젝트: frostytear/temboz
def add_feed(feed_xml):
  """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)"""
  with dbop.db() as db:
    c = db.cursor()
    feed_xml = feed_xml.replace('feed://', 'http://')
    # verify the feed
    r = requests.get(feed_xml)
    f = feedparser.parse(r.content)
    if 'url' not in f:
      f['url'] = feed_xml
    # CVS versions of feedparser are not throwing exceptions as they should
    # see:
    # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # some feeds have multiple links, one for self and one for PuSH
      if f.feed and 'link' not in f.feed and 'links' in f.feed:
        try:
          for l in f.feed['links']:
            if l['rel'] == 'self':
              f.feed['link'] = l['href']
        except KeyError:
          pass
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # try autodiscovery
      try:
        feed_xml = AutoDiscoveryHandler().feed_url(feed_xml)
      except HTMLParser.HTMLParseError:
        # in desperate conditions, regexps ride to the rescue
        try:
          feed_xml = re_autodiscovery(feed_xml)[0][1]
        except:
          util.print_stack()
          raise AutodiscoveryParseError
      if not feed_xml:
        raise ParseError
      r = requests.get(feed_xml)
      f = feedparser.parse(r.content)
      if not f.feed:
        raise ParseError
    # we have a valid feed, normalize it
    normalize.normalize_feed(f)
    feed = {
      'xmlUrl': f['url'],
      'htmlUrl': str(f.feed['link']),
      'etag': r.headers.get('Etag'),
      'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'),
      'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace')
      }
    for key, value in feed.items():
      if type(value) == str:
        feed[key] = value
    filters.load_rules(c)
    try:
      c.execute("""insert into fm_feeds
      (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values
      (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed)
      feed_uid = c.lastrowid
      num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid)
      db.commit()
      return feed_uid, feed['title'], num_added, num_filtered
    except sqlite3.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
예제 #8
0
파일: update.py 프로젝트: frostytear/temboz
def process_parsed_feed(db, c, f, feed_uid, feed_dupcheck=None, exempt=None):
  """Insert the entries from a feedparser parsed feed f in the database using
the cursor c for feed feed_uid.
Returns a tuple (number of items added unread, number of filtered items)"""
  num_added = 0
  num_filtered = 0
  filters.load_rules(c)
  # check if duplicate title checking is in effect
  if feed_dupcheck is None:
    c.execute("select feed_dupcheck from fm_feeds where feed_uid=?",
              [feed_uid])
    feed_dupcheck = bool(c.fetchone()[0])
  # check if the feed is exempt from filtering
  if exempt is None:
    c.execute("select feed_exempt from fm_feeds where feed_uid=?", [feed_uid])
    exempt = bool(c.fetchone()[0])
  # the Radio convention is reverse chronological order
  f['items'].reverse()
  for item in f['items']:
    try:
      normalize.normalize(item, f)
    except:
      util.print_stack()
      continue
    # evaluate the FilteringRules
    skip, rule = filters.evaluate_rules(item, f, feed_uid, exempt)
    filtered_by = None
    if skip:
      skip = -2
      if type(rule.uid) == int:
        filtered_by = rule.uid
      else:
        # XXX clunky convention for feed_rule, but that should disappear
        # XXX eventually
        filtered_by = 0
    title   = item['title']
    link    = item['link']
    guid    = item['id']
    author = item['author']
    created = item['created']
    modified = item['modified']
    if not modified:
      modified = None
    content = item['content']
    # check if the item already exists, using the GUID as key
    # but cache all seen GUIDs in a dictionary first, since most articles are
    # existing ones and we can save a database query this way
    if feed_uid in feed_guid_cache and guid in feed_guid_cache[feed_uid]:
      # existing entry and we've seen it before in this process instance
      # update the time stamp to prevent premature garbage-collection
      # in prune_feed_guid_cache
      feed_guid_cache.setdefault(feed_uid, dict())[guid] = time.time()
      continue
    else:
      feed_guid_cache.setdefault(feed_uid, dict())[guid] = time.time()
    # not seen yet, it may or may not be a duplicate, we have to find out the
    # hard way
    c.execute("""select item_uid, item_link,
    item_loaded, item_created, item_modified,
    item_md5hex, item_title, item_content, item_creator
    from fm_items where item_feed_uid=? and item_guid=?""",
              [feed_uid, guid])
    l = c.fetchall()
    # unknown GUID, but title/link duplicate checking may be in effect
    if not l:
      if feed_dupcheck:
        c.execute("""select count(*) from fm_items
        where item_feed_uid=? and (item_title=? or item_link=?)""",
                  [feed_uid, title, link])
        l = bool(c.fetchone()[0])
        if l:
          print >> param.activity, 'DUPLICATE TITLE', title
      # XXX Runt items (see normalize.py) are almost always spurious, we just
      # XXX skip them, although we may revisit this decision in the future
      if not l and item.get('RUNT', False):
        print >> param.activity, 'RUNT ITEM', item
        l = True
    # GUID already exists, this is a change
    else:
      assert len(l) == 1
      (item_uid, item_link, item_loaded, item_created, item_modified,
       item_md5hex, item_title, item_content, item_creator) = l[0]
      # if this is a feed without timestamps, use our timestamp to determine
      # the oldest item in the feed XML file
      if 'oldest' in f and f['oldest'] == '1970-01-01 00:00:00':
        if 'oldest_ts' not in f:
          f['oldest_ts'] = item_created
        else:
          f['oldest_ts'] = min(f['oldest_ts'], item_created)
      # XXX update item here
      # XXX update tags if required
    # GUID doesn't exist yet, insert it
    if not l:
      # finally, dereference the URL to get rid of annoying tracking servers
      # like feedburner, but only do this once to avoid wasting bandwidth
      link = normalize.dereference(link)
      try:
        c.execute("""insert into fm_items (item_feed_uid, item_guid,
        item_created,   item_modified, item_link, item_md5hex,
        item_title, item_content, item_creator, item_rating, item_rule_uid)
        values
        (?, ?, julianday(?), julianday(?), ?, ?, ?, ?, ?, ?, ?)""",
                  [feed_uid, guid, created, modified, link,
                   hashlib.md5(content).hexdigest(),
                   title, content, author, skip, filtered_by])
        # if we have tags, insert them
        # note: feedparser.py handles 'category' as a special case, so we
        # need to work around that to get to the data
        if item['item_tags']:
          c.execute("""select item_uid
          from fm_items where item_feed_uid=? and item_guid=?""",
                    [feed_uid, guid])
          item_uid = c.fetchone()[0]
          for tag in item['item_tags']:
            c.execute("""insert or ignore into fm_tags (tag_name, tag_item_uid)
            values (?, ?)""", [tag, item_uid])
        if skip:
          num_filtered += 1
          print >> param.activity, 'SKIP', title, rule
        else:
          num_added += 1
          print >> param.activity, ' ' * 4, title
      except:
        util.print_stack(['c', 'f'])
        continue
  # update timestamp of the oldest item still in the feed file
  if 'oldest' in f and f['oldest'] != '9999-99-99 99:99:99':
    if f['oldest'] == '1970-01-01 00:00:00' and 'oldest_ts' in f:
      c.execute("update fm_feeds set feed_oldest=? where feed_uid=?",
                [f['oldest_ts'], feed_uid])
    else:
      c.execute("""update fm_feeds set feed_oldest=julianday(?)
      where feed_uid=?""", [f['oldest'], feed_uid])
  
  return (num_added, num_filtered)
예제 #9
0
파일: server.py 프로젝트: rtgnx/temboz
def view_common(do_items=True):
    # Query-string parameters for this page
    #   show
    #   feed_uid
    #   search
    #   where_clause
    #   min, max (item UID)
    #
    # What items to use
    #   unread:   unread articles (default)
    #   up:       articles already flagged interesting
    #   down:     articles already flagged uninteresting
    #   filtered: filtered out articles
    #   mylos:    read-only view, e.g. http://www.majid.info/mylos/temboz.html
    with dbop.db() as c:
        filters.load_rules(c)
        show = flask.request.args.get('show', 'unread')
        i = update.ratings_dict.get(show, 1)
        show = update.ratings[i][0]
        item_desc = update.ratings[i][1]
        # items updated after the provided julianday
        updated = flask.request.args.get('updated', '')
        where = update.ratings[i][3]
        params = []
        if updated:
            try:
                updated = float(updated)
                params.append(updated)
                # we want all changes, not just unread ones, so we can mark
                # read articles as such in IndexedDB
                where = 'fm_items.updated > ?'
            except:
                print >> param.log, 'invalid updated=' + repr(updated)
        sort = flask.request.args.get('sort', 'seen')
        i = update.sorts_dict.get(sort, 1)
        sort = update.sorts[i][0]
        sort_desc = update.sorts[i][1]
        order_by = update.sorts[i][3]
        # optimizations for mobile devices
        mobile = bool(flask.request.args.get('mobile', False))
        # SQL options
        # filter by filter rule ID
        if show == 'filtered':
            try:
                params.append(int(flask.request.args['rule_uid']))
                where += ' and item_rule_uid=?'
            except:
                pass
        # filter by uid range
        try:
            params.append(int(flask.request.args['min']))
            where += ' and item_uid >= ?'
        except:
            pass
        try:
            params.append(int(flask.request.args['max']))
            where += ' and item_uid <= ?'
        except:
            pass
        # Optionally restrict view to a single feed
        feed_uid = None
        try:
            feed_uid = int(flask.request.args['feed_uid'])
            params.append(feed_uid)
            where += ' and item_feed_uid=?'
        except:
            pass
        # search functionality using fts5 if available
        search = flask.request.args.get('search')
        search_in = flask.request.args.get('search_in', 'title')
        #print >> param.log, 'search =', repr(search)
        if search:
            #print >> param.log, 'dbop.fts_enabled =', dbop.fts_enabled
            if dbop.fts_enabled:
                fterm = fts5.fts5_term(search)
                #print >> param.log, 'FTERM =', repr(fterm)
                where += """ and item_uid in (
          select rowid from search where %s '%s'
        )""" % ('item_title match' if search_in == 'title' else 'search=',
                fterm)
            else:
                search = search.lower()
                search_where = 'item_title' if search_in == 'title' else 'item_content'
                where += ' and lower(%s) like ?' % search_where
                if type(search) == unicode:
                    # XXX vulnerable to SQL injection attack
                    params.append('%%%s%%' %
                                  search.encode('ascii', 'xmlcharrefreplace'))
                else:
                    params.append('%%%s%%' % search)
                # Support for arbitrary where clauses in the view script. Not directly
                # accessible from the UI
                extra_where = flask.request.args.get('where_clause')
                if extra_where:
                    # XXX vulnerable to SQL injection attack
                    where += ' and %s' % extra_where
        # Preliminary support for offsets to read more than overload_threshold
        # articles, not fully implemented yet
        try:
            offset = int(flask.request.args['offset'])
        except:
            offset = 0
        ratings_list = ''.join('<li><a href="%s">%s</a></li>' %
                               (change_param(show=rating_name), rating_desc)
                               for (rating_name, rating_desc, discard,
                                    discard) in update.ratings)
        sort_list = ''.join('<li><a href="%s">%s</a></li>' %
                            (change_param(sort=sort_name), sort_desc)
                            for (sort_name, sort_desc, discard,
                                 discard) in update.sorts)
        items = []
        if do_items:
            # fetch and format items
            #print >> param.log, 'where =', where, 'params =', params
            tag_dict, rows = dbop.view_sql(c, where, order_by, params,
                                           param.overload_threshold)
            for row in rows:
                (uid, creator, title, link, content, loaded, created, rated,
                 delta_created, rating, filtered_by, feed_uid, feed_title,
                 feed_html, feed_xml, feed_snr, updated_ts) = row
                # redirect = '/redirect/%d' % uid
                redirect = link
                since_when = since(delta_created)
                creator = creator.replace('"', '\'')
                if rating == -2:
                    if filtered_by:
                        rule = filters.Rule.registry.get(filtered_by)
                        if rule:
                            title = rule.highlight_title(title)
                            content = rule.highlight_content(content)
                        elif filtered_by == 0:
                            content = '%s<br><p>Filtered by feed-specific Python rule</p>' \
                                      % content
                if uid in tag_dict or (creator and (creator != 'Unknown')):
                    # XXX should probably escape the Unicode here
                    tag_info = ' '.join('<span class="item tag">%s</span>' % t
                                        for t in sorted(tag_dict.get(uid, [])))
                    if creator and creator != 'Unknown':
                        tag_info = '%s<span class="author tag">%s</span>' \
                                   % (tag_info, creator)
                    tag_info = '<div class="tag_info" id="tags_%s">' % uid \
                               + tag_info + '</div>'
                    tag_call = '<a href="javascript:toggle_tags(%s);">tags</a>' % uid
                else:
                    tag_info = ''
                    tag_call = '(no tags)'
                items.append({
                    'uid': uid,
                    'since_when': since_when,
                    'creator': creator,
                    'loaded': loaded,
                    'feed_uid': feed_uid,
                    'title': title,
                    'feed_html': feed_html,
                    'content': content,
                    'tag_info': tag_info,
                    'tag_call': tag_call,
                    'redirect': redirect,
                    'feed_title': feed_title,
                    'feed_snr': feed_snr,
                    'updated_ts': updated_ts,
                    'rating': rating,
                })
    return {
        'show': show,
        'item_desc': item_desc,
        'feed_uid': feed_uid,
        'ratings_list': ratings_list,
        'sort_desc': sort_desc,
        'sort_list': sort_list,
        'items': items,
        'overload_threshold': param.overload_threshold
    }
예제 #10
0
파일: update.py 프로젝트: fazalmajid/temboz
def add_feed(feed_xml):
  """Try to add a feed. Returns a tuple (feed_uid, num_added, num_filtered)"""
  from singleton import db
  c = db.cursor()
  feed_xml = feed_xml.replace('feed://', 'http://')
  try:
    # verify the feed
    f = feedparser.parse(feed_xml)
    # CVS versions of feedparser are not throwing exceptions as they should
    # see:
    # http://sourceforge.net/tracker/index.php?func=detail&aid=1379172&group_id=112328&atid=661937
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # some feeds have multiple links, one for self and one for PuSH
      if f.feed and 'link' not in f.feed and 'links' in f.feed:
        try:
          for l in f.feed['links']:
            if l['rel'] == 'self':
              f.feed['link'] = l['href']
        except KeyError:
          pass
    if not f.feed or ('link' not in f.feed or 'title' not in f.feed):
      # try autodiscovery
      try:
        feed_xml = AutoDiscoveryHandler().feed_url(feed_xml)
      except HTMLParser.HTMLParseError:
        # in desperate conditions, regexps ride to the rescue
        try:
          feed_xml = re_autodiscovery(feed_xml)[0][1]
        except:
          util.print_stack()
          raise AutodiscoveryParseError
      if not feed_xml:
        raise ParseError
      f = feedparser.parse(feed_xml)
      if not f.feed:
        raise ParseError
    # we have a valid feed, normalize it
    normalize.normalize_feed(f)
    feed = {
      'xmlUrl': f['url'],
      'htmlUrl': str(f.feed['link']),
      'etag': f.get('etag'),
      'title': f.feed['title'].encode('ascii', 'xmlcharrefreplace'),
      'desc': f.feed['description'].encode('ascii', 'xmlcharrefreplace')
      }
    for key, value in feed.items():
      if type(value) == str:
        feed[key] = value
    filters.load_rules(db, c)
    try:
      c.execute("""insert into fm_feeds
      (feed_xml, feed_etag, feed_html, feed_title, feed_desc) values
      (:xmlUrl, :etag, :htmlUrl, :title, :desc)""", feed)
      feed_uid = c.lastrowid
      num_added, num_filtered = process_parsed_feed(db, c, f, feed_uid)
      db.commit()
      return feed_uid, feed['title'], num_added, num_filtered
    except sqlite.IntegrityError, e:
      if 'feed_xml' in str(e):
        db.rollback()
        raise FeedAlreadyExists
      else:
        db.rollback()
        raise UnknownError(str(e))
  finally:
    c.close()
예제 #11
0
파일: update.py 프로젝트: fazalmajid/temboz
def process_parsed_feed(db, c, f, feed_uid, feed_dupcheck=None, exempt=None):
  """Insert the entries from a feedparser parsed feed f in the database using
the cursor c for feed feed_uid.
Returns a tuple (number of items added unread, number of filtered items)"""
  num_added = 0
  num_filtered = 0
  filters.load_rules(db, c)
  # check if duplicate title checking is in effect
  if feed_dupcheck is None:
    c.execute("select feed_dupcheck from fm_feeds where feed_uid=?",
              [feed_uid])
    feed_dupcheck = bool(c.fetchone()[0])
  # check if the feed is exempt from filtering
  if exempt is None:
    c.execute("select feed_exempt from fm_feeds where feed_uid=?", [feed_uid])
    exempt = bool(c.fetchone()[0])
  # the Radio convention is reverse chronological order
  f['items'].reverse()
  for item in f['items']:
    try:
      normalize.normalize(item, f)
    except:
      util.print_stack()
      continue
    # evaluate the FilteringRules
    skip, rule = filters.evaluate_rules(item, f, feed_uid, exempt)
    filtered_by = None
    if skip:
      skip = -2
      if type(rule.uid) == int:
        filtered_by = rule.uid
      else:
        # XXX clunky convention for feed_rule, but that should disappear
        # XXX eventually
        filtered_by = 0
    title   = item['title']
    link    = item['link']
    guid    = item['id']
    author = item['author']
    created = item['created']
    modified = item['modified']
    if not modified:
      modified = None
    content = item['content']
    # check if the item already exists, using the GUID as key
    # but cache all seen GUIDs in a dictionary first, since most articles are
    # existing ones and we can save a database query this way
    if feed_uid in feed_guid_cache and guid in feed_guid_cache[feed_uid]:
      # existing entry and we've seen it before in this process instance
      # update the time stamp to prevent premature garbage-collection
      # in prune_feed_guid_cache
      feed_guid_cache.setdefault(feed_uid, dict())[guid] = time.time()
      continue
    else:
      feed_guid_cache.setdefault(feed_uid, dict())[guid] = time.time()
    # not seen yet, it may or may not be a duplicate, we have to find out the
    # hard way
    c.execute("""select item_uid, item_link,
    item_loaded, item_created, item_modified,
    item_md5hex, item_title, item_content, item_creator
    from fm_items where item_feed_uid=? and item_guid=?""",
              [feed_uid, guid])
    l = c.fetchall()
    # unknown GUID, but title/link duplicate checking may be in effect
    if not l:
      if feed_dupcheck:
        c.execute("""select count(*) from fm_items
        where item_feed_uid=? and (item_title=? or item_link=?)""",
                  [feed_uid, title, link])
        l = bool(c.fetchone()[0])
        if l:
          print >> param.activity, 'DUPLICATE TITLE', title
      # XXX Runt items (see normalize.py) are almost always spurious, we just
      # XXX skip them, although we may revisit this decision in the future
      if not l and item.get('RUNT', False):
        print >> param.activity, 'RUNT ITEM', item
        l = True
    # GUID already exists, this is a change
    else:
      assert len(l) == 1
      (item_uid, item_link, item_loaded, item_created, item_modified,
       item_md5hex, item_title, item_content, item_creator) = l[0]
      # if this is a feed without timestamps, use our timestamp to determine
      # the oldest item in the feed XML file
      if 'oldest' in f and f['oldest'] == '1970-01-01 00:00:00':
        if 'oldest_ts' not in f:
          f['oldest_ts'] = item_created
        else:
          f['oldest_ts'] = min(f['oldest_ts'], item_created)
      # XXX update item here
      # XXX update tags if required
    # GUID doesn't exist yet, insert it
    if not l:
      # finally, dereference the URL to get rid of annoying tracking servers
      # like feedburner, but only do this once to avoid wasting bandwidth
      link = normalize.dereference(link)
      try:
        c.execute("""insert into fm_items (item_feed_uid, item_guid,
        item_created,   item_modified, item_link, item_md5hex,
        item_title, item_content, item_creator, item_rating, item_rule_uid)
        values
        (?, ?, julianday(?), julianday(?), ?, ?, ?, ?, ?, ?, ?)""",
                  [feed_uid, guid, created, modified, link,
                   hashlib.md5(content).hexdigest(),
                   title, content, author, skip, filtered_by])
        # if we have tags, insert them
        # note: feedparser.py handles 'category' as a special case, so we
        # need to work around that to get to the data
        if item['item_tags']:
          c.execute("""select item_uid
          from fm_items where item_feed_uid=? and item_guid=?""",
                    [feed_uid, guid])
          item_uid = c.fetchone()[0]
          for tag in item['item_tags']:
            c.execute("""insert or ignore into fm_tags (tag_name, tag_item_uid)
            values (?, ?)""", [tag, item_uid])
        if skip:
          num_filtered += 1
          print >> param.activity, 'SKIP', title, rule
        else:
          num_added += 1
          print >> param.activity, ' ' * 4, title
      except:
        util.print_stack(['c', 'f'])
        continue
  # update timestamp of the oldest item still in the feed file
  if 'oldest' in f and f['oldest'] != '9999-99-99 99:99:99':
    if f['oldest'] == '1970-01-01 00:00:00' and 'oldest_ts' in f:
      c.execute("update fm_feeds set feed_oldest=? where feed_uid=?",
                [f['oldest_ts'], feed_uid])
    else:
      c.execute("""update fm_feeds set feed_oldest=julianday(?)
      where feed_uid=?""", [f['oldest'], feed_uid])
  
  return (num_added, num_filtered)