def print_rss_item(item): print "rss_title: ", item.title.string #print "link.next: ", item.link.next -> same as guid #print "description: ", item.description.string print "rss_pubdate: ", item.pubdate.string print "guid.string: ", HTMLParser().unescape(item.guid.string) print "rss_creator: ", item.find("dc:creator").string print "rss_date: ", item.find("dc:date").string #print "description:\n", HTMLParser().unescape(item.description.string) if __name__ == '__main__': #connector.drop_indexes() #connector.remove() #connector.ensure_indexes() print connector.index_information() rss_url = "http://info.valladolid.es/web/culturayturismo/canalrss/-/journal/rss/10167/RSS-AGENDA" process_rss(rss_url) for json in connector.find(): print(json) #print json._id #utils.print_item_dict(json)