def __init__(self, repository, name=None, initial_values = ()):
        """Initialize an instance.

        :param repository: the repository we're running in
        :type repository: ``repository.Repository``
        :param name: unique ID to use as the collection name.  If not specified, one is generated.
        :type name: string
        :param initial_values: a sequence of values to start off the collection with.  Defaults to empty list.
        :type initial_values: sequence of either Document or Collection instances
        """
        self.__xitems = {}                # maps id or name to element.  Elements may be documents or other collections.
        self.scantime = repository.mod_time()   # last time looked at
        self.repository = repository
        self.id = name or create_new_id()
        self.storage_path = os.path.join(repository.collections_folder(), self.id)
        if os.path.exists(self.storage_path):
            try:
                fp = open(self.storage_path, 'rb')
                self.load(fp)
                fp.close()
            except:
                type, value, tb = sys.exc_info()
                note(2, "Couldn't load collection %s:\n%s", self.id, ''.join(traceback.format_exception(type, value, tb)))
        elif initial_values:
            for item in initial_values:
                if isinstance(item, Document):
                    self.__xitems[item.id] = DocumentPointer(item)
                elif isinstance(item, Collection):
                    self.__xitems[item.name()] = CollectionPointer(item.id, item)
def _scan_rss_sites(repo):

    global _ADDED_SITES, _REMOVED_SITES

    try:
        from uplib.plibUtil import configurator, note, write_metadata, id_to_time, create_new_id
        from uplib.extensions import find_and_load_extension
        conf = configurator.default_configurator()

        if repo:
            sys_inits_path = os.path.join(conf.get('uplib-lib'), 'site-extensions')
            repo_inits_path = os.path.join(repo.root(), "overhead", "extensions", "active")
            upload_m = find_and_load_extension("UploadDocument", "%s|%s" % (repo_inits_path, sys_inits_path), None, True)
            if not upload_m:
                note(0, "Can't load UploadDocument extension!")
                sys.exit(1)
            else:
                note("UploadDocument extension is %s", upload_m)

        scan_period = conf.get_int("rss-scan-period", 60 * 2)
        startup_delay = conf.get_int("rss-startup-delay", 0)
        del conf

        import feedparser

        if startup_delay > 0:
            note(3, "startup delay is %d", startup_delay)
            time.sleep(startup_delay)

    except:
        note(0, "RSSReader:  exception starting RSS scan thread:\n%s",
             ''.join(traceback.format_exception(*sys.exc_info())))
        return

    rss_sites = -1
    while True:
        try:
            conf = configurator()       # re-read uplibrc file
            old_rss_sites = rss_sites
            rss_sites = conf.get("rss-sites")
            if old_rss_sites == -1 or (old_rss_sites != rss_sites):
                note(2, "rss_sites are %s", rss_sites)
            scan_period = conf.get_int("rss-scan-period", scan_period)
            expiration_period = conf.get_int("rss-expiration-period", 30 * 24 * 60 * 60)        # 30 days
            if rss_sites:
                rss_sites = rss_sites.split() + _ADDED_SITES
            else:
                rss_sites = _ADDED_SITES[:]
            if rss_sites:
                for site in _REMOVED_SITES:
                    if site in rss_sites:
                        rss_sites.remove(site)
            if rss_sites:
                feeds = []
                for site in rss_sites:
                    if site.startswith("feed:"):
                        feeds.append(feedparser.parse(site))
                    elif site.startswith("http:") or site.startswith("https:"):
                        feeds += find_feeds(site)
                note("feeds are:\n%s", [(x.feed.title, x.href, len(x.entries)) for x in feeds])
                for feed in feeds:
                    note("RSSReader:  %s: %s entries in feed %s", time.ctime(), len(feed.entries), feed.feed.title)
                    for entry in feed.entries:
                        d = process_entry(entry)
                        if not d:
                            continue
                        id = d.get("rss-id")
                        hits = repo.do_query('+rss-id:"%s"' % id)
                        if hits:
                            # already in repo
                            continue
                        if repo:
                            response = FakeResponse(repo)
                            mdoutput = StringIO.StringIO()
                            write_metadata(mdoutput, d)
                            md = mdoutput.getvalue()
                            mdoutput.close()
                            upload_m.add(repo, response, { 'URL': d.get("original-url"),
                                                           'wait': "true",
                                                           'no-redirect': "true",
                                                           'metadata': md,
                                                           'md-categories': "RSSReader/%s" % feed.feed.title,
                                                           })
                            if response.thread:
                                while response.thread.isAlive():
                                    response.thread.join(1.0)
                            note("RSSReader:  %s:  %s (%s: %s)", time.ctime(), repr(d.get("title")), response.code, response.message)
                        else:
                            note("RSSReader:  %s:  %s (%s)\n    %s", time.ctime(), repr(d.get("title")), d.get("date"), d.get("summary"))
            # now do expiries
            old_id = create_new_id(time.time() - expiration_period)[:-5]
            hits = repo.do_query("categories:RSSReader AND id:[00000-00-0000-000 TO %s] AND NOT categories:RSSReader/_noexpire_" % old_id)
            for score, doc in hits:
                # check to see if the user has looked at it
                if os.path.exists(os.path.join(doc.folder(), "activity")):
                    doc.add_category("RSSReader/_noexpire_", True)
                # and if not, remove it
                else:
                    repo.delete_document(doc.id)
            time.sleep(scan_period)
        except KeyboardInterrupt:
            if _IGNORE_KEYBOARD_INTERRUPTS:
                note(0, "RSSReader:  %s", ''.join(traceback.format_exception(*sys.exc_info())))
            else:
                sys.exit(0)                
        except:
            note(0, "RSSReader:  %s", ''.join(traceback.format_exception(*sys.exc_info())))