Esempi in Python per DetailItem, esempi in Python per scrapers.items.DetailItem

Esempio n. 1

0

Mostra file

File: chart.py Progetto: bencevans/tomahawk-contrib

    def __init__(self):
        try:
            self.prettyName
        except AttributeError:
            self.prettyName = None

        self.__setStorage()

        self.details = DetailItem(
            Detail(id=self.source_id,
                   name=self.prettyName,
                   description=self.description,
                   have_extra=self.have_extra))
        chartCache.shoveDetails(self.details, self.is_chart)
        self.have_extra = self.have_extra

        #Chart specific optional attributes
        self.geo = None
        self.extra = None
        self.genre = None
        self.source_id = self.details.get('id')
        self.cache_id = "%scacheControl" % self.source_id
        self.default = 0

        self.init()
        self.parse()

Esempio n. 2

0

Mostra file

File: chart.py Progetto: alexdavis-bf-toys/tomahawk-contrib

    def __init__(self):
        try:
            self.prettyName
        except AttributeError:
            self.prettyName = None

        self.__setStorage()

        self.details = DetailItem(Detail(
            id = self.source_id,
            name = self.prettyName,
            description = self.description,
            have_extra = self.have_extra
            )
        );
        chartCache.shoveDetails(self.details, self.is_chart)
        self.have_extra = self.have_extra

        #Chart specific optional attributes
        self.geo = None
        self.extra = None
        self.genre = None
        self.source_id = self.details.get('id')
        self.cache_id = "%scacheControl" % self.source_id
        self.default = 0

        self.init()
        self.parse()

Esempio n. 3

0

Mostra file

File: djshop_spider.py Progetto: bencevans/tomahawk-contrib

class DjShopSpider(CrawlSpider):
    name = "djshop.de"
    allowed_domains = ["djshop.de"]
    baseUrl = "http://www.djshop.de/"
    baseCharts = [
        "%sDownload-Charts/ex/s~mp3,u~charts/xe/Download-Charts.html" %
        baseUrl,
        "%sVinyl-Charts/ex/s~charts/xe/charts.html" % baseUrl
    ]

    chartTypes = [{"unpretty" : "MP3 Downloads Charts", "pretty" : "Digital Charts"}, \
                  {"unpretty" : "Charts Style Charts", "pretty" : "Vinyl Charts"}, \
                  {"unpretty" : "Charts Top 100", "pretty" : "Top 100"}, \
                  {"unpretty" : "Charts International Charts", "pretty" : "International Charts"}]

    # Expires in 2 days
    expires = chartCache.timedeltaUntilDays(2)
    cacheControl = chartCache.setCacheControl(expires)

    source_id = "djshop.de"
    source_name = "djShop.de"
    description = "Updated daily with what's currently hot on the electronic scene."
    have_extra = True
    details = DetailItem(
        Detail(id=source_id,
               description=description,
               name=source_name,
               have_extra=have_extra))

    def __init__(self, name=None, **kwargs):
        super(DjShopSpider, self).__init__()
        chartCache.shoveDetails(self.details)
        self.get_chart_urls()

    def get_chart_urls(self):
        for chart in self.baseCharts:
            req = urllib2.Request(chart)
            hxs = HtmlXPathSelector(text=urllib2.urlopen(req).read())
            try:
                navBox = hxs.select('//div[@id="leftColumn"]')
                navList = navBox.select('//ul[@class="navUL"]/li')
                for index, link in enumerate(navList):
                    if not "Label Charts" in link.select(
                            'a/text()').extract()[0].strip():
                        self.start_urls.append(
                            "http://www.djshop.de" +
                            link.select('a/@href').extract()[0].strip())
            except Exception, e:
                print e

Esempio n. 4

0

Mostra file

File: chart.py Progetto: alexdavis-bf-toys/tomahawk-contrib

class Chart(object):
    # This is backward compatible, Types must be singular
    __types = {'album' : 'Album','track' : 'Track','artist' : 'Artist'}
    __outputMsgError = "Empty chart, not storing:"
    __outputMsgOk = "Saving chart:"
    def __init__(self):
        try:
            self.prettyName
        except AttributeError:
            self.prettyName = None

        self.__setStorage()

        self.details = DetailItem(Detail(
            id = self.source_id,
            name = self.prettyName,
            description = self.description,
            have_extra = self.have_extra
            )
        );
        chartCache.shoveDetails(self.details, self.is_chart)
        self.have_extra = self.have_extra

        #Chart specific optional attributes
        self.geo = None
        self.extra = None
        self.genre = None
        self.source_id = self.details.get('id')
        self.cache_id = "%scacheControl" % self.source_id
        self.default = 0

        self.init()
        self.parse()

    def __setStorage(self):
        try:
            if self.is_chart:
                self.storage = chartCache.storage
            else:
                self.storage = chartCache.newreleases
        except AttributeError:
            self.is_chart = True
            self.storage = chartCache.storage

    def __getCacheControl(self):
        self.cacheControl = chartCache.setCacheControl(self.expires)

    def __createChartItem(self):
        try:
            chart = ChartItem(
                id = slugify(self.chart_id),
                name = self.chart_name,
                display_name = self.display_name,
                origin = self.origin,
                type = self.chart_type,
                default = self.default,
                source = self.source_id,
                date = self.cacheControl.get("Date-Modified"),
                expires = self.cacheControl.get("Date-Expires"),
                maxage = self.cacheControl.get("Max-Age"),
                list = self.chart_list
            )
        except AttributeError:
            print "ChartItem is missing required attributes!"
            raise

        if self.have_extra :
            if self.geo is not None:
                chart['geo'] = self.geo
            if self.genre is not None:
                chart['genre'] = self.genre
            if self.extra is not None:
                chart['extra'] = self.extra

        return chart

    def __updateCache(self, metadata, chart):
        data = self.storage.get(self.source_id, {})
        data[self.chart_id] = metadata
        self.storage[self.source_id] = data
        self.storage[self.source_id+self.chart_id] = dict(chart)
        self.storage[self.cache_id] = dict(self.cacheControl)

    def __createMetadata(self, chart):
        # metadata is the chart item minus the actual list plus a size
        metadata_keys = filter(lambda k: k != 'list', chart)
        metadata = { key: chart[key] for key in metadata_keys }
        metadata['size'] = len(self.chart_list)
        return metadata

    def init(self):
        raise NotImplementedError( "Scraper needs to implement this!")

    def parse(self):
        raise NotImplementedError( "Scraper needs to implement this!")

    def setChartId(self, id):
        self.chart_id = id

    def setChartName(self, name):
        self.chart_name = name.title()

    def setChartOrigin(self, origin):
        self.origin = origin

    def setChartDisplayName(self, name):
        self.display_name = name

    def setIsDefault(self, default):
        self.default = default

    def setChartGenre(self, genre):
        self.genre = genre

    def setChartExtra(self, extra):
        self.extra = extra

    def setChartGeo(self, geo):
        self.geo = geo;

    def setChartType(self, type):
        self.chart_type = self.__types.get(type.lower())

    def setExpiresInDays(self, day, hour = 1):
        self.expires = chartCache.timedeltaUntilDays(day, hour)
        self.__getCacheControl()

    def setExpiresOnDay(self, day, hour = 1):
        self.expires = chartCache.timedeltaUntilWeekday(day, hour)
        self.__getCacheControl()

    def getJsonContent(self, url):
        try:
            request = urllib2.Request(url)
            response = urllib2.urlopen(request)
            content = response.read().decode('utf-8')
            return json.loads(content)
        except Exception,e:
            print "Error: %s" % e
            return {}

Esempio n. 5

0

Mostra file

File: billboard_spider.py Progetto: bencevans/tomahawk-contrib

class BillboardSpider(CrawlSpider):
    name = "billboard.com"
    allowed_domains = ["billboard.com"]
    start_urls = [
        # this is the list of all the charts
        "http://www.billboard.com/charts"
    ]

    # xpath to retrieve the urls to specific charts
    chart_xpath = '//span[@class="field-content"]/a'
    # the xpath to the pagination links
    next_page_xpath = '//div[@class="chart_pager_bottom"]/div/ul/li[@class="pager-item"]/a/@href'
    # we only need one rule, and that is to follow
    # the links from the charts list page
    rules = [
        Rule(SgmlLinkExtractor(allow=['/charts/\w+'],
                               restrict_xpaths=chart_xpath),
             callback='parse_chart',
             follow=True)
    ]

    expires = chartCache.timedeltaUntilWeekday(EXPIRES_DAY, EXPIRES_HOUR)
    cacheControl = chartCache.setCacheControl(expires)

    source_id = "billboard"
    description = "The week's top-selling and most played albums and tracks across all genres, ranked by sales data and radio airtime as compiled by Nielsen."
    details = DetailItem(Detail(
        id=source_id,
        description=description,
    ))

    def __init__(self, name=None, **kwargs):
        super(BillboardSpider, self).__init__()
        chartCache.shoveDetails(self.details)

    def parse_chart(self, response):
        hxs = HtmlXPathSelector(response)

        chart_name = hxs.select(
            '//h1[@id="page-title"]/text()').extract()[0].strip()
        #chart_type = hxs.select('//*[@id="chart-list"]/div[@id="chart-type-fb"]/text()').extract()[0].strip()

        # get a list of pages
        next_pages = hxs.select(self.next_page_xpath).extract()
        # remove javascript links and turn it into a queue, also, we want to exclude next chart (!)
        next_pages = deque(filter(lambda e: not 'javascript' in e, next_pages))

        # Correct the grammar to fit our expectations
        if chart_name == 'Germany Songs':
            chart_name = 'German Tracks'

        chart = ChartItem()
        chart['name'] = chart_name
        chart['display_name'] = chart_name if chart_name else "Top Overall"
        chart['origin'] = response.url
        chart['source'] = 'billboard'
        chart['id'] = slugify(chart_name)
        chart['list'] = []

        chart['date'] = self.cacheControl.get("Date-Modified")
        chart['expires'] = self.cacheControl.get("Date-Expires")
        chart['maxage'] = self.cacheControl.get("Max-Age")

        # lets figure out the content type
        lower_name = chart_name.lower()
        if 'songs' in lower_name:
            chart['type'] = 'Track'
            typeItem = SingleTrackItem()
        elif 'albums' in lower_name \
            or any(lower_name in s for s in ['soundtracks', 'billboard 200', 'tastemakers']):
            chart['type'] = 'Album'
            typeItem = SingleAlbumItem()
        elif any(lower_name in s for s in ['social 50', 'uncharted']):
            chart['type'] = 'Artist'
            typeItem = SingleArtistItem()
        else:
            chart['type'] = 'Track'
            typeItem = SingleTrackItem()

        if (chart['id'] == settings["BILLBOARD_DEFAULT_ALBUMCHART"]
                or chart['id'] == settings["BILLBOARD_DEFAULT_TRACKCHART"]):
            chart['default'] = 1

        chart = self.parse_items(hxs, chart, typeItem)
        # ok, we've prepped the chart container, lets start getting the pages
        if len(next_pages) > 0:
            next_page = next_pages.popleft()
            request = Request('http://www.billboard.com' + next_page,
                              callback=lambda r: self.parse_page(
                                  r, chart, next_pages, typeItem))
            yield request

    def parse_items(self, hxs, chart, typeItem):
        # parse every chart entry
        chart_list = []
        for item in hxs.select(
                '//div[contains(@class,"chart_listing")]/article'):
            loader = XPathItemLoader(typeItem, selector=item)
            loader.add_xpath(
                'rank',
                'header/span[contains(@class, "chart_position")]/text()')
            # ptitle yields the title for the type, so just set the title to whatever the chartype is.
            if 'artist' in chart['type'].lower():
                loader.add_xpath('artist',
                                 'header/p[@class="chart_info"]/a/text()')
            else:
                loader.add_xpath(chart['type'].lower(), 'header/h1/text()')
                loader.add_xpath('artist',
                                 'header/p[@class="chart_info"]/a/text()')
                loader.add_xpath('album',
                                 'header/p[@class="chart_info"]/text()')

            single = loader.load_item()
            chart_list.append(dict(single))

        chart['list'] += chart_list

        return chart

    def parse_page(self, response, chart, next_pages, typeItem):

        hxs = HtmlXPathSelector(response)
        chart = self.parse_items(hxs, chart, typeItem)

        if len(next_pages) == 0:
            log.msg("Done with %s" % (chart['name']))
            yield chart
        else:
            next_page = next_pages.popleft()
            log.msg("Starting nextpage (%s) of %s - %s left" %
                    (next_page, chart['name'], len(next_pages)))
            request = Request('http://www.billboard.com' + next_page,
                              callback=lambda r: self.parse_page(
                                  r, chart, next_pages, typeItem))
            yield request

Esempio n. 6

0

Mostra file

File: chart.py Progetto: bencevans/tomahawk-contrib

class Chart(object):
    # This is backward compatible, Types must be singular
    __types = {'album': 'Album', 'track': 'Track', 'artist': 'Artist'}
    __outputMsgError = "Empty chart, not storing:"
    __outputMsgOk = "Saving chart:"

    def __init__(self):
        try:
            self.prettyName
        except AttributeError:
            self.prettyName = None

        self.__setStorage()

        self.details = DetailItem(
            Detail(id=self.source_id,
                   name=self.prettyName,
                   description=self.description,
                   have_extra=self.have_extra))
        chartCache.shoveDetails(self.details, self.is_chart)
        self.have_extra = self.have_extra

        #Chart specific optional attributes
        self.geo = None
        self.extra = None
        self.genre = None
        self.source_id = self.details.get('id')
        self.cache_id = "%scacheControl" % self.source_id
        self.default = 0

        self.init()
        self.parse()

    def __setStorage(self):
        try:
            if self.is_chart:
                self.storage = chartCache.storage
            else:
                self.storage = chartCache.newreleases
        except AttributeError:
            self.is_chart = True
            self.storage = chartCache.storage

    def __getCacheControl(self):
        self.cacheControl = chartCache.setCacheControl(self.expires)

    def __createChartItem(self):
        try:
            chart = ChartItem(id=slugify(self.chart_id),
                              name=self.chart_name,
                              display_name=self.display_name,
                              origin=self.origin,
                              type=self.chart_type,
                              default=self.default,
                              source=self.source_id,
                              date=self.cacheControl.get("Date-Modified"),
                              expires=self.cacheControl.get("Date-Expires"),
                              maxage=self.cacheControl.get("Max-Age"),
                              list=self.chart_list)
        except AttributeError:
            print "ChartItem is missing required attributes!"
            raise

        if self.have_extra:
            if self.geo is not None:
                chart['geo'] = self.geo
            if self.genre is not None:
                chart['genre'] = self.genre
            if self.extra is not None:
                chart['extra'] = self.extra

        return chart

    def __updateCache(self, metadata, chart):
        data = self.storage.get(self.source_id, {})
        data[self.chart_id] = metadata
        self.storage[self.source_id] = data
        self.storage[self.source_id + self.chart_id] = dict(chart)
        self.storage[self.cache_id] = dict(self.cacheControl)

    def __createMetadata(self, chart):
        # metadata is the chart item minus the actual list plus a size
        metadata_keys = filter(lambda k: k != 'list', chart)
        metadata = {key: chart[key] for key in metadata_keys}
        metadata['size'] = len(self.chart_list)
        return metadata

    def init(self):
        raise NotImplementedError("Scraper needs to implement this!")

    def parse(self):
        raise NotImplementedError("Scraper needs to implement this!")

    def setChartId(self, id):
        self.chart_id = id

    def setChartName(self, name):
        self.chart_name = name.title()

    def setChartOrigin(self, origin):
        self.origin = origin

    def setChartDisplayName(self, name):
        self.display_name = name

    def setIsDefault(self, default):
        self.default = default

    def setChartGenre(self, genre):
        self.genre = genre

    def setChartExtra(self, extra):
        self.extra = extra

    def setChartGeo(self, geo):
        self.geo = geo

    def setChartType(self, type):
        self.chart_type = self.__types.get(type.lower())

    def setExpiresInDays(self, day, hour=1):
        self.expires = chartCache.timedeltaUntilDays(day, hour)
        self.__getCacheControl()

    def setExpiresOnDay(self, day, hour=1):
        self.expires = chartCache.timedeltaUntilWeekday(day, hour)
        self.__getCacheControl()

    def getJsonContent(self, url):
        try:
            request = urllib2.Request(url)
            response = urllib2.urlopen(request)
            content = response.read().decode('utf-8')
            return json.loads(content)
        except Exception, e:
            print "Error: %s" % e
            return {}

Esempio n. 7

0

Mostra file

File: hnhh_spider.py Progetto: bencevans/tomahawk-contrib

class HNHHSpider(CrawlSpider):
    name = "hotnewhiphop.com"
    allowed_domains = ["hotnewhiphop.com"]
    start_urls = [
        # this is the list of all the charts
        "http://www.hotnewhiphop.com/top100/song/mainstream/month/1/",
        "http://www.hotnewhiphop.com/top100/song/mainstream/alltime/1",
        "http://www.hotnewhiphop.com/top100/song/upcoming/month/1",
        "http://www.hotnewhiphop.com/top100/song/upcoming/alltime/1",
        "http://www.hotnewhiphop.com/top100/mixtape/upcoming/month/1",
        "http://www.hotnewhiphop.com/top100/mixtape/upcoming/alltime/1",
        "http://www.hotnewhiphop.com/top100/mixtape/mainstream/month/1",
        "http://www.hotnewhiphop.com/top100/mixtape/mainstream/alltime/1",
    ]
    source_id = "hotnewhiphop"
    source_name = "HotNewHipHop"
    description = "Real hip-hop fans collaborates to create HotNewHiphops's daily updated charts."
    have_extra = True

    details = DetailItem(
        Detail(id=source_id,
               description=description,
               name=source_name,
               have_extra=have_extra))

    def __init__(self, name=None, **kwargs):
        super(HNHHSpider, self).__init__()
        chartCache.shoveDetails(self.details)

    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        chart_name = "Top 100"
        try:
            chart_type = hxs.select(
                '//*[@class="tab-right-active"]/text()').extract()[0].strip()
        except IndexError:
            chart_type = hxs.select(
                '//*[@class="tab-left-active"]/text()').extract()[0].strip()

        if "upcoming" in response.url:
            extra = "Upcoming"
        if "mainstream" in response.url:
            extra = "Mainstream"
        if "alltime" in response.url:
            chart_name += " " + extra
            extra = "Alltime"

        id = chart_name + extra + chart_type
        chart = ChartItem()
        chart['name'] = chart_name + " " + chart_type
        chart[
            'display_name'] = chart["name"] if chart["name"] else "Top Overall"
        chart['origin'] = response.url
        chart['source'] = 'hotnewhiphop'
        chart['id'] = slugify(id)
        chart['list'] = []
        chart['extra'] = extra

        expires = chartCache.timedeltaUntilDays(1)
        cacheControl = chartCache.setCacheControl(expires)
        chart['date'] = cacheControl.get("Date-Modified")
        chart['expires'] = cacheControl.get("Date-Expires")
        chart['maxage'] = cacheControl.get("Max-Age")

        if "mixtape" in response.url:
            if extra == "Upcoming":
                chart['default'] = 1
            chart['type'] = "Album"
            loader = SingleUrlAlbumItem()
            urlKey = "url"
            url = "http://www.hotnewhiphop.com/ajax/api/getMixtape/"
        elif "song" in response.url:
            chart['type'] = "Track"
            loader = SingleUrlTrackItem()
            # Later on, if we have a hnhh resolver, this url could be used to
            # get a valid mp3 stream.
            url = "hnhh://www.hotnewhiphop.com/ajax/api/getSong/"
            urlKey = "stream_url"
        else:
            log.msg("Error with %s" % (chart['name']))
            return

        chart_list = []
        rank = 0
        for item in hxs.select('//div[@class="newCell newCell2"]'):
            if chart['type'] == "Album":
                loader = XPathItemLoader(SingleUrlAlbumItem(), selector=item)
            if chart['type'] == "Track":
                loader = XPathItemLoader(SingleUrlTrackItem(), selector=item)
            loader.add_xpath(chart['type'].lower(),
                             'div[@class="centerBlock"]/h3/a/text()')
            loader.add_xpath('artist', 'div[@class="centerBlock"]/a/i/text()')
            loader.add_xpath(urlKey, 'div[@class="centerBlock"]/a/@href')
            single = loader.load_item()
            single[urlKey] = url + urlparse(single[urlKey]).path.split(".")[1]
            rank += 1
            single['rank'] = rank
            chart_list.append(dict(single))

        log.msg("Done with %s" % (chart['name']))
        chart['list'] += chart_list
        return chart

Esempio n. 8

0

Mostra file

class MetacriticSpider(CrawlSpider):
    name = "metacritic.com"
    allowed_domains = ["metacritic.com"]
    baseUrl = "http://www.metacritic.com"

    genre_nav_xpath = './/ul[@class="genre_nav"]/li'
    types_xpath = './/ul[contains(@class, "tabs")]/li'
    first_nav_xpath = './/ul[contains(@class, "nav_items")]/li'
    current_page_name_xpath = './/ul[contains(@class, "tabs")]/li/span[@class="active"]/span/text()'
    list_xpath = './/ol[contains(@class,"list_product_condensed")]/li'
    next_page_xpath = './/ul[@class="pages"]/li/a/@href'
    coming_soon_table_xpath = './/table[@class="musicTable"]/tr'
    coming_soon_artist_xpath = './/td[@class="artistName"]'
    coming_soon_album_xpath = './/td[@class="albumTitle"]/text()'

    start_urls = ["http://www.metacritic.com/music"]

    rules = [
        Rule(SgmlLinkExtractor(allow=("albums/genre/\w+", ),
                               deny=(
                                   "music",
                                   "name",
                               ),
                               restrict_xpaths=(genre_nav_xpath, )),
             callback='parse_page',
             follow=True),
        Rule(SgmlLinkExtractor(
            deny=("albums/genre/\w+", "name", "music",
                  "coming-soon/(metascore|userscore|name|date)",
                  "new-releases/name"),
            restrict_xpaths=(types_xpath, )),
             callback='parse_new_releases',
             follow=True),
        Rule(SgmlLinkExtractor(allow=(
            "albums/release-date",
            "albums/score",
        ),
                               deny=(
                                   "feature",
                                   "artist",
                                   "/\w+/people",
                               ),
                               restrict_xpaths=(first_nav_xpath, )),
             callback='parse_new_releases',
             follow=True)
    ]

    # Expires in 2 days
    expires = chartCache.timedeltaUntilDays(1)
    cacheControl = chartCache.setCacheControl(expires)
    source_id = "metacritic"
    source_name = "Metacritic"
    description = "Critically acclaimed and noteworthy music."
    have_extra = True

    details = DetailItem(
        Detail(id=source_id,
               description=description,
               name=source_name,
               have_extra=have_extra))

    def __init__(self, name=None, **kwargs):
        super(MetacriticSpider, self).__init__()
        chartCache.shoveDetails(self.details)
        chartCache.shoveDetails(self.details, False)

    def get_current_genre(self, hxs):
        navList = hxs.select(self.genre_nav_xpath)
        for index, item in enumerate(navList):
            if item.select('.//span'):
                return item.select('.//span/text()').extract()[0].strip()
        return None

    def get_current(self, hxs, chart):
        try:
            active = hxs.select(self.current_page_name_xpath).extract()
            chart["extra"] = active[0].strip()
            chart["name"] = active[1].strip()
            chart["display_name"] = chart["name"]
            chart["id"] = slugify(chart["name"] + chart["extra"])
        except Exception, e:
            if "coming-soon" in chart["origin"]:
                chart["extra"] = "Coming Soon"
                chart["name"] = "By Date"
                chart["display_name"] = chart["name"]
                chart["id"] = slugify(chart["name"] + chart["extra"])

Esempio n. 9

0

Mostra file

File: itunes_spider.py Progetto: bencevans/tomahawk-contrib

class ItunesSpider(BaseSpider):
    name = 'itunes.com'
    allowed_domains = ['itunes.com']

    start_urls = get_feed_urls(settings['ITUNES_LIMIT'])


    source_id = "itunes"
    source_name = "iTunes"
    description = "Updated daily, browse what currently hot on Itunes. Includes albums, tracks by genre."
    have_extra = True

    details = DetailItem(Detail(
        id = source_id, 
        description = description,
        name = source_name,
        have_extra = have_extra
        )
    );

    def __init__(self, name=None, **kwargs):
        super(ItunesSpider, self).__init__()
        chartCache.shoveDetails(self.details)

    def parse(self, response):
        try:
            feed = etree.fromstring(response.body)
        except etree.XMLSyntaxError:
            log.msg("Parse error, skipping: %s"%(response.url), loglevel=log.WARNING)
            return None
        
        if feed.tag == '{http://www.w3.org/2005/Atom}feed':
            return self.parse_atom(feed)
        elif feed.tag == 'rss':
            return self.parse_rss(feed, response.url)
    
    # Itunes is weird, but that we know. There's sometimes no information about this feed
    # in the response, so we need to construct Title and so forth
    # Also, its sometimes b0rked description filled with bs json
    def parse_rss(self, feed, url):
        genre_name = None
        feed_extra = None
        feed_type = "Album"
        geo = None
        genre = filter(lambda k: 'genre' in k, urlparser(url).path.split("/"))
        try :
            genre_name = get_genre( genre[0].split("=")[1] )
            # geo in xpath is different ISO than in url. We want cc not xpath
            # geo = feed.xpath('.//channel/language')[0].text
            geo_re = re.compile("cc=(.*)(?=\/)")
            rGeo =  geo_re.search(url)
            if rGeo != None:
                geo = rGeo.groups()[0]
        except IndexError :
            return
        
        if 'newreleases' in url :
            feed_extra = "New Album Releases"
        if 'justadded' in url :
            feed_extra = "Just Added Albums"
        if 'featuredalbums' in url:
            feed_extra = "Featured Albums"
        
        if feed_extra is None or genre_name is None or geo is None :
            return
        
        ns = { 'itms': 'http://phobos.apple.com/rss/1.0/modules/itms/' }
        entries = feed.xpath('.//channel/item')
        rank = 0
        chart_list = []
        for entry in entries:
            artist = entry.xpath('itms:artist', namespaces=ns)[0].text
            album = entry.xpath('itms:album', namespaces=ns)[0].text
            rank += 1
            item = SingleAlbumItem()
            item['artist'] = artist
            item['album'] = album
            item['rank'] = rank
            chart_list.append( dict(item) )
        
        chart = ChartItem()
        # Unique ids
        _id = url
        md5 = hashlib.md5()
        md5.update(_id)
        _id = md5.hexdigest()
        
        chart['id'] = _id
        chart['origin'] = url
        chart['genre'] = genre_name
        chart['geo'] = geo.lower()
        chart['name'] = genre_name
        chart['extra'] = feed_extra
        chart["newrls"] = True
        chart['type'] = feed_type
        chart['list'] = chart_list
        chart['source'] = 'itunes'
        # maxage is the last item scraped
        # Expires in 1 days
        expires = chartCache.timedeltaUntilDays(1)
        cacheControl = chartCache.setCacheControl(expires)
        chart['date'] = cacheControl.get("Date-Modified")
        chart['expires'] = cacheControl.get("Date-Expires")
        chart['maxage'] = cacheControl.get("Max-Age")
        
        if _id == settings["ITUNES_DEFAULT_NRCHART"]:
            chart['default'] = 1
        
        return chart
    
    def parse_atom(self, feed):
        ns = {'ns': 'http://www.w3.org/2005/Atom',
            'im': 'http://itunes.apple.com/rss'}
        try:
            _id = feed.xpath('/ns:feed/ns:id', namespaces=ns)[0].text
            _type = feed.xpath('/ns:feed/ns:entry/im:contentType/im:contentType', namespaces=ns)[0].attrib['term']
        except IndexError:
            return

        if _type != "Album" and _type != "Track":
            return # skip playlists

        entries = feed.xpath('/ns:feed/ns:entry', namespaces=ns)
        chart_list = []
        rank = 0
        for entry in entries:
            title = entry.xpath('im:name', namespaces=ns)[0].text
            artist = entry.xpath('im:artist', namespaces=ns)[0].text
            if _type == "Album":
                album = title
                item = SingleAlbumItem()
            elif _type == "Track":
                album = entry.xpath('im:collection/im:name', namespaces=ns)[0].text
                item = SingleTrackItem()
                item['track'] = title
            
            rank += 1
            item['artist'] = artist
            item['album'] = album
            item['rank'] = rank
            chart_list.append( dict(item) )

        title = feed.xpath('ns:title', namespaces=ns)[0].text

        geo = None
        geo_re = re.compile("cc=([a-zA-Z]+)")
        rGeo =  geo_re.search(_id)
        if rGeo != None:
            geo = rGeo.groups()[0]

        genre = None
        genre_re = re.compile("genre=(\d+)/")
        rGenre =  genre_re.search(_id)
        if rGenre != None:
            genre = rGenre.groups()[0]

        if not genre is None:
            genre = get_genre(genre)

        origin = _id
        md5 = hashlib.md5()
        md5.update(_id)
        _id = md5.hexdigest()

        if geo is None:
            geo_s = origin.split("/")
            geo = geo_s

        chart = ChartItem()
        # Itunes expires tomorrow at 00am
        chart['id'] = _id
        chart['display_name'] = genre if genre else "Top Overall"
        chart['origin'] = origin
        chart['genre'] = genre
        chart['geo'] = geo
        chart['name'] = title
        chart['type'] = _type
        chart['list'] = chart_list
        chart['source'] = 'itunes'

        # maxage is the last item scraped
        expires = chartCache.timedeltaUntilDays(1)
        cacheControl = chartCache.setCacheControl(expires)
        chart['date'] = cacheControl.get("Date-Modified")
        chart['expires'] = cacheControl.get("Date-Expires")
        chart['maxage'] = cacheControl.get("Max-Age")

        if(_id == settings["ITUNES_DEFAULT_ALBUMCHART"] or _id == settings["ITUNES_DEFAULT_TRACKCHART"]):
            print "Found default" + _id
            chart['default'] = 1

        return chart