def get_new_talks(self): """ Returns talks as dicts {title:, author:, thumb:, date:, duration:, link:}. """ sd_rss_url = "http://feeds.feedburner.com/tedtalks_video" hd_rss_url = "http://feeds.feedburner.com/TedtalksHD" rss_urls = [sd_rss_url, hd_rss_url] # Prefer HD, but get SD if that's all there is (my friends) document_fetchers = [] if do_multi_threading: pool = Pool(processes=1) # Maybe it would be better to fetch them simultaneously? for url in rss_urls: result = pool.apply_async(get_document, [url]) document_fetchers.append(lambda x: result.get(30)) else: for url in rss_urls: document_fetchers.append(lambda x: get_document(url)) talksByTitle = {} for documentFetcher in document_fetchers: rss = documentFetcher(None) # Is this evil? We have to pass something into the lambda. for item in fromstring(rss).findall('channel/item'): talk = self.get_talk_details(item) talksByTitle[talk['title']] = talk if do_multi_threading: # pool.close() # pool.join() # If I close Pool using close/join, then it logs # ERROR: Error Type: <type 'exceptions.OSError'> # ERROR: Error Contents: [Errno 3] No such process # when the app exits (i.e. finalization occurs). # Whereas this seems to be OK. pool._terminate() return talksByTitle.itervalues()