def __init__(self): try: self.prettyName except AttributeError: self.prettyName = None self.__setStorage() self.details = DetailItem( Detail(id=self.source_id, name=self.prettyName, description=self.description, have_extra=self.have_extra)) chartCache.shoveDetails(self.details, self.is_chart) self.have_extra = self.have_extra #Chart specific optional attributes self.geo = None self.extra = None self.genre = None self.source_id = self.details.get('id') self.cache_id = "%scacheControl" % self.source_id self.default = 0 self.init() self.parse()
def __init__(self): try: self.prettyName except AttributeError: self.prettyName = None self.__setStorage() self.details = DetailItem(Detail( id = self.source_id, name = self.prettyName, description = self.description, have_extra = self.have_extra ) ); chartCache.shoveDetails(self.details, self.is_chart) self.have_extra = self.have_extra #Chart specific optional attributes self.geo = None self.extra = None self.genre = None self.source_id = self.details.get('id') self.cache_id = "%scacheControl" % self.source_id self.default = 0 self.init() self.parse()
class DjShopSpider(CrawlSpider): name = "djshop.de" allowed_domains = ["djshop.de"] baseUrl = "http://www.djshop.de/" baseCharts = [ "%sDownload-Charts/ex/s~mp3,u~charts/xe/Download-Charts.html" % baseUrl, "%sVinyl-Charts/ex/s~charts/xe/charts.html" % baseUrl ] chartTypes = [{"unpretty" : "MP3 Downloads Charts", "pretty" : "Digital Charts"}, \ {"unpretty" : "Charts Style Charts", "pretty" : "Vinyl Charts"}, \ {"unpretty" : "Charts Top 100", "pretty" : "Top 100"}, \ {"unpretty" : "Charts International Charts", "pretty" : "International Charts"}] # Expires in 2 days expires = chartCache.timedeltaUntilDays(2) cacheControl = chartCache.setCacheControl(expires) source_id = "djshop.de" source_name = "djShop.de" description = "Updated daily with what's currently hot on the electronic scene." have_extra = True details = DetailItem( Detail(id=source_id, description=description, name=source_name, have_extra=have_extra)) def __init__(self, name=None, **kwargs): super(DjShopSpider, self).__init__() chartCache.shoveDetails(self.details) self.get_chart_urls() def get_chart_urls(self): for chart in self.baseCharts: req = urllib2.Request(chart) hxs = HtmlXPathSelector(text=urllib2.urlopen(req).read()) try: navBox = hxs.select('//div[@id="leftColumn"]') navList = navBox.select('//ul[@class="navUL"]/li') for index, link in enumerate(navList): if not "Label Charts" in link.select( 'a/text()').extract()[0].strip(): self.start_urls.append( "http://www.djshop.de" + link.select('a/@href').extract()[0].strip()) except Exception, e: print e
class Chart(object): # This is backward compatible, Types must be singular __types = {'album' : 'Album','track' : 'Track','artist' : 'Artist'} __outputMsgError = "Empty chart, not storing:" __outputMsgOk = "Saving chart:" def __init__(self): try: self.prettyName except AttributeError: self.prettyName = None self.__setStorage() self.details = DetailItem(Detail( id = self.source_id, name = self.prettyName, description = self.description, have_extra = self.have_extra ) ); chartCache.shoveDetails(self.details, self.is_chart) self.have_extra = self.have_extra #Chart specific optional attributes self.geo = None self.extra = None self.genre = None self.source_id = self.details.get('id') self.cache_id = "%scacheControl" % self.source_id self.default = 0 self.init() self.parse() def __setStorage(self): try: if self.is_chart: self.storage = chartCache.storage else: self.storage = chartCache.newreleases except AttributeError: self.is_chart = True self.storage = chartCache.storage def __getCacheControl(self): self.cacheControl = chartCache.setCacheControl(self.expires) def __createChartItem(self): try: chart = ChartItem( id = slugify(self.chart_id), name = self.chart_name, display_name = self.display_name, origin = self.origin, type = self.chart_type, default = self.default, source = self.source_id, date = self.cacheControl.get("Date-Modified"), expires = self.cacheControl.get("Date-Expires"), maxage = self.cacheControl.get("Max-Age"), list = self.chart_list ) except AttributeError: print "ChartItem is missing required attributes!" raise if self.have_extra : if self.geo is not None: chart['geo'] = self.geo if self.genre is not None: chart['genre'] = self.genre if self.extra is not None: chart['extra'] = self.extra return chart def __updateCache(self, metadata, chart): data = self.storage.get(self.source_id, {}) data[self.chart_id] = metadata self.storage[self.source_id] = data self.storage[self.source_id+self.chart_id] = dict(chart) self.storage[self.cache_id] = dict(self.cacheControl) def __createMetadata(self, chart): # metadata is the chart item minus the actual list plus a size metadata_keys = filter(lambda k: k != 'list', chart) metadata = { key: chart[key] for key in metadata_keys } metadata['size'] = len(self.chart_list) return metadata def init(self): raise NotImplementedError( "Scraper needs to implement this!") def parse(self): raise NotImplementedError( "Scraper needs to implement this!") def setChartId(self, id): self.chart_id = id def setChartName(self, name): self.chart_name = name.title() def setChartOrigin(self, origin): self.origin = origin def setChartDisplayName(self, name): self.display_name = name def setIsDefault(self, default): self.default = default def setChartGenre(self, genre): self.genre = genre def setChartExtra(self, extra): self.extra = extra def setChartGeo(self, geo): self.geo = geo; def setChartType(self, type): self.chart_type = self.__types.get(type.lower()) def setExpiresInDays(self, day, hour = 1): self.expires = chartCache.timedeltaUntilDays(day, hour) self.__getCacheControl() def setExpiresOnDay(self, day, hour = 1): self.expires = chartCache.timedeltaUntilWeekday(day, hour) self.__getCacheControl() def getJsonContent(self, url): try: request = urllib2.Request(url) response = urllib2.urlopen(request) content = response.read().decode('utf-8') return json.loads(content) except Exception,e: print "Error: %s" % e return {}
class BillboardSpider(CrawlSpider): name = "billboard.com" allowed_domains = ["billboard.com"] start_urls = [ # this is the list of all the charts "http://www.billboard.com/charts" ] # xpath to retrieve the urls to specific charts chart_xpath = '//span[@class="field-content"]/a' # the xpath to the pagination links next_page_xpath = '//div[@class="chart_pager_bottom"]/div/ul/li[@class="pager-item"]/a/@href' # we only need one rule, and that is to follow # the links from the charts list page rules = [ Rule(SgmlLinkExtractor(allow=['/charts/\w+'], restrict_xpaths=chart_xpath), callback='parse_chart', follow=True) ] expires = chartCache.timedeltaUntilWeekday(EXPIRES_DAY, EXPIRES_HOUR) cacheControl = chartCache.setCacheControl(expires) source_id = "billboard" description = "The week's top-selling and most played albums and tracks across all genres, ranked by sales data and radio airtime as compiled by Nielsen." details = DetailItem(Detail( id=source_id, description=description, )) def __init__(self, name=None, **kwargs): super(BillboardSpider, self).__init__() chartCache.shoveDetails(self.details) def parse_chart(self, response): hxs = HtmlXPathSelector(response) chart_name = hxs.select( '//h1[@id="page-title"]/text()').extract()[0].strip() #chart_type = hxs.select('//*[@id="chart-list"]/div[@id="chart-type-fb"]/text()').extract()[0].strip() # get a list of pages next_pages = hxs.select(self.next_page_xpath).extract() # remove javascript links and turn it into a queue, also, we want to exclude next chart (!) next_pages = deque(filter(lambda e: not 'javascript' in e, next_pages)) # Correct the grammar to fit our expectations if chart_name == 'Germany Songs': chart_name = 'German Tracks' chart = ChartItem() chart['name'] = chart_name chart['display_name'] = chart_name if chart_name else "Top Overall" chart['origin'] = response.url chart['source'] = 'billboard' chart['id'] = slugify(chart_name) chart['list'] = [] chart['date'] = self.cacheControl.get("Date-Modified") chart['expires'] = self.cacheControl.get("Date-Expires") chart['maxage'] = self.cacheControl.get("Max-Age") # lets figure out the content type lower_name = chart_name.lower() if 'songs' in lower_name: chart['type'] = 'Track' typeItem = SingleTrackItem() elif 'albums' in lower_name \ or any(lower_name in s for s in ['soundtracks', 'billboard 200', 'tastemakers']): chart['type'] = 'Album' typeItem = SingleAlbumItem() elif any(lower_name in s for s in ['social 50', 'uncharted']): chart['type'] = 'Artist' typeItem = SingleArtistItem() else: chart['type'] = 'Track' typeItem = SingleTrackItem() if (chart['id'] == settings["BILLBOARD_DEFAULT_ALBUMCHART"] or chart['id'] == settings["BILLBOARD_DEFAULT_TRACKCHART"]): chart['default'] = 1 chart = self.parse_items(hxs, chart, typeItem) # ok, we've prepped the chart container, lets start getting the pages if len(next_pages) > 0: next_page = next_pages.popleft() request = Request('http://www.billboard.com' + next_page, callback=lambda r: self.parse_page( r, chart, next_pages, typeItem)) yield request def parse_items(self, hxs, chart, typeItem): # parse every chart entry chart_list = [] for item in hxs.select( '//div[contains(@class,"chart_listing")]/article'): loader = XPathItemLoader(typeItem, selector=item) loader.add_xpath( 'rank', 'header/span[contains(@class, "chart_position")]/text()') # ptitle yields the title for the type, so just set the title to whatever the chartype is. if 'artist' in chart['type'].lower(): loader.add_xpath('artist', 'header/p[@class="chart_info"]/a/text()') else: loader.add_xpath(chart['type'].lower(), 'header/h1/text()') loader.add_xpath('artist', 'header/p[@class="chart_info"]/a/text()') loader.add_xpath('album', 'header/p[@class="chart_info"]/text()') single = loader.load_item() chart_list.append(dict(single)) chart['list'] += chart_list return chart def parse_page(self, response, chart, next_pages, typeItem): hxs = HtmlXPathSelector(response) chart = self.parse_items(hxs, chart, typeItem) if len(next_pages) == 0: log.msg("Done with %s" % (chart['name'])) yield chart else: next_page = next_pages.popleft() log.msg("Starting nextpage (%s) of %s - %s left" % (next_page, chart['name'], len(next_pages))) request = Request('http://www.billboard.com' + next_page, callback=lambda r: self.parse_page( r, chart, next_pages, typeItem)) yield request
class Chart(object): # This is backward compatible, Types must be singular __types = {'album': 'Album', 'track': 'Track', 'artist': 'Artist'} __outputMsgError = "Empty chart, not storing:" __outputMsgOk = "Saving chart:" def __init__(self): try: self.prettyName except AttributeError: self.prettyName = None self.__setStorage() self.details = DetailItem( Detail(id=self.source_id, name=self.prettyName, description=self.description, have_extra=self.have_extra)) chartCache.shoveDetails(self.details, self.is_chart) self.have_extra = self.have_extra #Chart specific optional attributes self.geo = None self.extra = None self.genre = None self.source_id = self.details.get('id') self.cache_id = "%scacheControl" % self.source_id self.default = 0 self.init() self.parse() def __setStorage(self): try: if self.is_chart: self.storage = chartCache.storage else: self.storage = chartCache.newreleases except AttributeError: self.is_chart = True self.storage = chartCache.storage def __getCacheControl(self): self.cacheControl = chartCache.setCacheControl(self.expires) def __createChartItem(self): try: chart = ChartItem(id=slugify(self.chart_id), name=self.chart_name, display_name=self.display_name, origin=self.origin, type=self.chart_type, default=self.default, source=self.source_id, date=self.cacheControl.get("Date-Modified"), expires=self.cacheControl.get("Date-Expires"), maxage=self.cacheControl.get("Max-Age"), list=self.chart_list) except AttributeError: print "ChartItem is missing required attributes!" raise if self.have_extra: if self.geo is not None: chart['geo'] = self.geo if self.genre is not None: chart['genre'] = self.genre if self.extra is not None: chart['extra'] = self.extra return chart def __updateCache(self, metadata, chart): data = self.storage.get(self.source_id, {}) data[self.chart_id] = metadata self.storage[self.source_id] = data self.storage[self.source_id + self.chart_id] = dict(chart) self.storage[self.cache_id] = dict(self.cacheControl) def __createMetadata(self, chart): # metadata is the chart item minus the actual list plus a size metadata_keys = filter(lambda k: k != 'list', chart) metadata = {key: chart[key] for key in metadata_keys} metadata['size'] = len(self.chart_list) return metadata def init(self): raise NotImplementedError("Scraper needs to implement this!") def parse(self): raise NotImplementedError("Scraper needs to implement this!") def setChartId(self, id): self.chart_id = id def setChartName(self, name): self.chart_name = name.title() def setChartOrigin(self, origin): self.origin = origin def setChartDisplayName(self, name): self.display_name = name def setIsDefault(self, default): self.default = default def setChartGenre(self, genre): self.genre = genre def setChartExtra(self, extra): self.extra = extra def setChartGeo(self, geo): self.geo = geo def setChartType(self, type): self.chart_type = self.__types.get(type.lower()) def setExpiresInDays(self, day, hour=1): self.expires = chartCache.timedeltaUntilDays(day, hour) self.__getCacheControl() def setExpiresOnDay(self, day, hour=1): self.expires = chartCache.timedeltaUntilWeekday(day, hour) self.__getCacheControl() def getJsonContent(self, url): try: request = urllib2.Request(url) response = urllib2.urlopen(request) content = response.read().decode('utf-8') return json.loads(content) except Exception, e: print "Error: %s" % e return {}
class HNHHSpider(CrawlSpider): name = "hotnewhiphop.com" allowed_domains = ["hotnewhiphop.com"] start_urls = [ # this is the list of all the charts "http://www.hotnewhiphop.com/top100/song/mainstream/month/1/", "http://www.hotnewhiphop.com/top100/song/mainstream/alltime/1", "http://www.hotnewhiphop.com/top100/song/upcoming/month/1", "http://www.hotnewhiphop.com/top100/song/upcoming/alltime/1", "http://www.hotnewhiphop.com/top100/mixtape/upcoming/month/1", "http://www.hotnewhiphop.com/top100/mixtape/upcoming/alltime/1", "http://www.hotnewhiphop.com/top100/mixtape/mainstream/month/1", "http://www.hotnewhiphop.com/top100/mixtape/mainstream/alltime/1", ] source_id = "hotnewhiphop" source_name = "HotNewHipHop" description = "Real hip-hop fans collaborates to create HotNewHiphops's daily updated charts." have_extra = True details = DetailItem( Detail(id=source_id, description=description, name=source_name, have_extra=have_extra)) def __init__(self, name=None, **kwargs): super(HNHHSpider, self).__init__() chartCache.shoveDetails(self.details) def parse(self, response): hxs = HtmlXPathSelector(response) chart_name = "Top 100" try: chart_type = hxs.select( '//*[@class="tab-right-active"]/text()').extract()[0].strip() except IndexError: chart_type = hxs.select( '//*[@class="tab-left-active"]/text()').extract()[0].strip() if "upcoming" in response.url: extra = "Upcoming" if "mainstream" in response.url: extra = "Mainstream" if "alltime" in response.url: chart_name += " " + extra extra = "Alltime" id = chart_name + extra + chart_type chart = ChartItem() chart['name'] = chart_name + " " + chart_type chart[ 'display_name'] = chart["name"] if chart["name"] else "Top Overall" chart['origin'] = response.url chart['source'] = 'hotnewhiphop' chart['id'] = slugify(id) chart['list'] = [] chart['extra'] = extra expires = chartCache.timedeltaUntilDays(1) cacheControl = chartCache.setCacheControl(expires) chart['date'] = cacheControl.get("Date-Modified") chart['expires'] = cacheControl.get("Date-Expires") chart['maxage'] = cacheControl.get("Max-Age") if "mixtape" in response.url: if extra == "Upcoming": chart['default'] = 1 chart['type'] = "Album" loader = SingleUrlAlbumItem() urlKey = "url" url = "http://www.hotnewhiphop.com/ajax/api/getMixtape/" elif "song" in response.url: chart['type'] = "Track" loader = SingleUrlTrackItem() # Later on, if we have a hnhh resolver, this url could be used to # get a valid mp3 stream. url = "hnhh://www.hotnewhiphop.com/ajax/api/getSong/" urlKey = "stream_url" else: log.msg("Error with %s" % (chart['name'])) return chart_list = [] rank = 0 for item in hxs.select('//div[@class="newCell newCell2"]'): if chart['type'] == "Album": loader = XPathItemLoader(SingleUrlAlbumItem(), selector=item) if chart['type'] == "Track": loader = XPathItemLoader(SingleUrlTrackItem(), selector=item) loader.add_xpath(chart['type'].lower(), 'div[@class="centerBlock"]/h3/a/text()') loader.add_xpath('artist', 'div[@class="centerBlock"]/a/i/text()') loader.add_xpath(urlKey, 'div[@class="centerBlock"]/a/@href') single = loader.load_item() single[urlKey] = url + urlparse(single[urlKey]).path.split(".")[1] rank += 1 single['rank'] = rank chart_list.append(dict(single)) log.msg("Done with %s" % (chart['name'])) chart['list'] += chart_list return chart
class MetacriticSpider(CrawlSpider): name = "metacritic.com" allowed_domains = ["metacritic.com"] baseUrl = "http://www.metacritic.com" genre_nav_xpath = './/ul[@class="genre_nav"]/li' types_xpath = './/ul[contains(@class, "tabs")]/li' first_nav_xpath = './/ul[contains(@class, "nav_items")]/li' current_page_name_xpath = './/ul[contains(@class, "tabs")]/li/span[@class="active"]/span/text()' list_xpath = './/ol[contains(@class,"list_product_condensed")]/li' next_page_xpath = './/ul[@class="pages"]/li/a/@href' coming_soon_table_xpath = './/table[@class="musicTable"]/tr' coming_soon_artist_xpath = './/td[@class="artistName"]' coming_soon_album_xpath = './/td[@class="albumTitle"]/text()' start_urls = ["http://www.metacritic.com/music"] rules = [ Rule(SgmlLinkExtractor(allow=("albums/genre/\w+", ), deny=( "music", "name", ), restrict_xpaths=(genre_nav_xpath, )), callback='parse_page', follow=True), Rule(SgmlLinkExtractor( deny=("albums/genre/\w+", "name", "music", "coming-soon/(metascore|userscore|name|date)", "new-releases/name"), restrict_xpaths=(types_xpath, )), callback='parse_new_releases', follow=True), Rule(SgmlLinkExtractor(allow=( "albums/release-date", "albums/score", ), deny=( "feature", "artist", "/\w+/people", ), restrict_xpaths=(first_nav_xpath, )), callback='parse_new_releases', follow=True) ] # Expires in 2 days expires = chartCache.timedeltaUntilDays(1) cacheControl = chartCache.setCacheControl(expires) source_id = "metacritic" source_name = "Metacritic" description = "Critically acclaimed and noteworthy music." have_extra = True details = DetailItem( Detail(id=source_id, description=description, name=source_name, have_extra=have_extra)) def __init__(self, name=None, **kwargs): super(MetacriticSpider, self).__init__() chartCache.shoveDetails(self.details) chartCache.shoveDetails(self.details, False) def get_current_genre(self, hxs): navList = hxs.select(self.genre_nav_xpath) for index, item in enumerate(navList): if item.select('.//span'): return item.select('.//span/text()').extract()[0].strip() return None def get_current(self, hxs, chart): try: active = hxs.select(self.current_page_name_xpath).extract() chart["extra"] = active[0].strip() chart["name"] = active[1].strip() chart["display_name"] = chart["name"] chart["id"] = slugify(chart["name"] + chart["extra"]) except Exception, e: if "coming-soon" in chart["origin"]: chart["extra"] = "Coming Soon" chart["name"] = "By Date" chart["display_name"] = chart["name"] chart["id"] = slugify(chart["name"] + chart["extra"])
class ItunesSpider(BaseSpider): name = 'itunes.com' allowed_domains = ['itunes.com'] start_urls = get_feed_urls(settings['ITUNES_LIMIT']) source_id = "itunes" source_name = "iTunes" description = "Updated daily, browse what currently hot on Itunes. Includes albums, tracks by genre." have_extra = True details = DetailItem(Detail( id = source_id, description = description, name = source_name, have_extra = have_extra ) ); def __init__(self, name=None, **kwargs): super(ItunesSpider, self).__init__() chartCache.shoveDetails(self.details) def parse(self, response): try: feed = etree.fromstring(response.body) except etree.XMLSyntaxError: log.msg("Parse error, skipping: %s"%(response.url), loglevel=log.WARNING) return None if feed.tag == '{http://www.w3.org/2005/Atom}feed': return self.parse_atom(feed) elif feed.tag == 'rss': return self.parse_rss(feed, response.url) # Itunes is weird, but that we know. There's sometimes no information about this feed # in the response, so we need to construct Title and so forth # Also, its sometimes b0rked description filled with bs json def parse_rss(self, feed, url): genre_name = None feed_extra = None feed_type = "Album" geo = None genre = filter(lambda k: 'genre' in k, urlparser(url).path.split("/")) try : genre_name = get_genre( genre[0].split("=")[1] ) # geo in xpath is different ISO than in url. We want cc not xpath # geo = feed.xpath('.//channel/language')[0].text geo_re = re.compile("cc=(.*)(?=\/)") rGeo = geo_re.search(url) if rGeo != None: geo = rGeo.groups()[0] except IndexError : return if 'newreleases' in url : feed_extra = "New Album Releases" if 'justadded' in url : feed_extra = "Just Added Albums" if 'featuredalbums' in url: feed_extra = "Featured Albums" if feed_extra is None or genre_name is None or geo is None : return ns = { 'itms': 'http://phobos.apple.com/rss/1.0/modules/itms/' } entries = feed.xpath('.//channel/item') rank = 0 chart_list = [] for entry in entries: artist = entry.xpath('itms:artist', namespaces=ns)[0].text album = entry.xpath('itms:album', namespaces=ns)[0].text rank += 1 item = SingleAlbumItem() item['artist'] = artist item['album'] = album item['rank'] = rank chart_list.append( dict(item) ) chart = ChartItem() # Unique ids _id = url md5 = hashlib.md5() md5.update(_id) _id = md5.hexdigest() chart['id'] = _id chart['origin'] = url chart['genre'] = genre_name chart['geo'] = geo.lower() chart['name'] = genre_name chart['extra'] = feed_extra chart["newrls"] = True chart['type'] = feed_type chart['list'] = chart_list chart['source'] = 'itunes' # maxage is the last item scraped # Expires in 1 days expires = chartCache.timedeltaUntilDays(1) cacheControl = chartCache.setCacheControl(expires) chart['date'] = cacheControl.get("Date-Modified") chart['expires'] = cacheControl.get("Date-Expires") chart['maxage'] = cacheControl.get("Max-Age") if _id == settings["ITUNES_DEFAULT_NRCHART"]: chart['default'] = 1 return chart def parse_atom(self, feed): ns = {'ns': 'http://www.w3.org/2005/Atom', 'im': 'http://itunes.apple.com/rss'} try: _id = feed.xpath('/ns:feed/ns:id', namespaces=ns)[0].text _type = feed.xpath('/ns:feed/ns:entry/im:contentType/im:contentType', namespaces=ns)[0].attrib['term'] except IndexError: return if _type != "Album" and _type != "Track": return # skip playlists entries = feed.xpath('/ns:feed/ns:entry', namespaces=ns) chart_list = [] rank = 0 for entry in entries: title = entry.xpath('im:name', namespaces=ns)[0].text artist = entry.xpath('im:artist', namespaces=ns)[0].text if _type == "Album": album = title item = SingleAlbumItem() elif _type == "Track": album = entry.xpath('im:collection/im:name', namespaces=ns)[0].text item = SingleTrackItem() item['track'] = title rank += 1 item['artist'] = artist item['album'] = album item['rank'] = rank chart_list.append( dict(item) ) title = feed.xpath('ns:title', namespaces=ns)[0].text geo = None geo_re = re.compile("cc=([a-zA-Z]+)") rGeo = geo_re.search(_id) if rGeo != None: geo = rGeo.groups()[0] genre = None genre_re = re.compile("genre=(\d+)/") rGenre = genre_re.search(_id) if rGenre != None: genre = rGenre.groups()[0] if not genre is None: genre = get_genre(genre) origin = _id md5 = hashlib.md5() md5.update(_id) _id = md5.hexdigest() if geo is None: geo_s = origin.split("/") geo = geo_s chart = ChartItem() # Itunes expires tomorrow at 00am chart['id'] = _id chart['display_name'] = genre if genre else "Top Overall" chart['origin'] = origin chart['genre'] = genre chart['geo'] = geo chart['name'] = title chart['type'] = _type chart['list'] = chart_list chart['source'] = 'itunes' # maxage is the last item scraped expires = chartCache.timedeltaUntilDays(1) cacheControl = chartCache.setCacheControl(expires) chart['date'] = cacheControl.get("Date-Modified") chart['expires'] = cacheControl.get("Date-Expires") chart['maxage'] = cacheControl.get("Max-Age") if(_id == settings["ITUNES_DEFAULT_ALBUMCHART"] or _id == settings["ITUNES_DEFAULT_TRACKCHART"]): print "Found default" + _id chart['default'] = 1 return chart