Example #1
0
    def do_parse(self, chart, response):
        response = json.loads(response.body_as_unicode())
        for rank, item in enumerate(response['tracks']):
            entry = TomahawkItemLoader()
            entry.add_value("artist", item['artist'])
            entry.add_value("track", item['title'])
            entry.add_value("rank", rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)
 def do_parse(self, chart, response):
     j = json.loads(response.body_as_unicode())
     delimiters = [" - ", " -", ": ", ":", "\u2014"]
     for rank, item in enumerate(j):
         if rank < 100:
             metadata = self.parse_metadata(item, delimiters)
             entry = TomahawkItemLoader()
             entry.add_value("track", metadata.pop('track'))
             entry.add_value("artist",metadata.pop("artist"))
             entry.add_value("rank", rank)
             chart.add_value("list", entry.load_item())
         else:
             break
     return self.do_process_item(chart)
Example #3
0
    def do_parse(self, chart, response):
        j = json.loads(response.body_as_unicode())
        status = j['searchResponse']['controlSet']['status'].strip()
        if status != 'ok':
            return None

        for entry, rank in enumerate(self.get_list_from_result(j)):
            entry = TomahawkItemLoader()
            entry.add_value("artist", entry['artist'])
            entry.add_value("album", entry['title'])
            entry.add_value("rank", rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)
    def do_parse(self, chart, response):
        selector = Selector(response)
        type = self.type_from_name(self.do_get_type(chart))

        if response.url in self.boxed_urls:
            item_selector = selector.xpath('.//*[@class="row"]/li')
        else:
            item_selector = selector.xpath('.//ul[@class="m0"]/li')

        for rank, item in enumerate(item_selector):
            entry = TomahawkItemLoader(selector=item)

            if type is self.ArtistType:
                entry.add_xpath(type.lower(), './/span[@class="list-item-title"]/a/text()')
            else:
                entry.add_xpath(type.lower(), './/em[@class="list-item-name"]/text()')
                entry.add_xpath("artist", './/em[@class="list-item-subname"]/strong/text()')

            entry.add_value("rank", rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)
Example #5
0
    def do_parse(self, chart, response):
        response = json.loads(response.body)
        item_type = self.do_get_type(chart)

        for rank, items in enumerate(response['result']):
            entry = TomahawkItemLoader()
            entry.add_value(item_type, items.pop('name'))

            if item_type != TomahawkSpiderHelper.ArtistType.lower():
                entry.add_value("artist",items.pop("artist"))

            entry.add_value("rank", rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)
    def do_parse(self, chart, response):
        selector = Selector(response)

        for rank, item in enumerate(selector.xpath(self.list_xpath)):
            entry = TomahawkItemLoader(selector=item)
            entry.add_value("rank", rank)
            entry.add_xpath("artist", './/span[@class="data"]/text()')
            entry.add_xpath("album", './/a/text()')
            chart.add_value("list", entry.load_item())

        # process the item if there is no more next_pages, otherwise, return none and keep parsing
        next_selector = selector.xpath(self.next_page_xpath)
        if not next_selector:
            self.log("No more next page! Processing")
            return self.do_process_item(chart)

        next_page = extract(self.next_page_xpath+"/@href", selector)[-1:]
        if next_page and int(next_page) > 9:
            self.log("Maximum depth! Processing")
            return self.do_process_item(chart)

        return None
    def do_parse(self, chart, response):
        selector = Selector(response)
        items_xpath = '//div[contains(@class,"chart-data")]/div[@class="container"]/article[contains(@class,"chart-row")]'
        items = selector.xpath(items_xpath)

        item_type = self.do_get_type(chart)

        for item in items:
            entry = TomahawkItemLoader(selector=item)
            entry.add_xpath("rank", './/div[@class="chart-row__primary"]/div[@class="chart-row__rank"]/span/text()')

            # H2 is Artist or Song
            entry.add_xpath(item_type, './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h2/text()')
            entry.add_xpath(item_type, './/div[@class="track"]/span[@class="track-info"]/span[@class="web-only"]/a/text()')

            if item_type is not 'Artist':
                entry.add_xpath("artist", './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h3/text()')
                entry.add_xpath("artist", './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h3/a/text()')

            chart.add_value("list", entry.load_item())

        # process the item if there is no more next_pages, otherwise, return none and keep parsing
        if not selector.xpath(self.next_page_xpath):
            return self.do_process_item(chart)
        return None
Example #8
0
    def do_parse(self, chart, response):
        selector = Selector(response=response)
        selector.register_namespace('itms', 'http://phobos.apple.com/rss/1.0/modules/itms/')
        selector.register_namespace('ns', 'http://www.w3.org/2005/Atom')
        selector.register_namespace('im','http://itunes.apple.com/rss')

        itms = selector.xpath("//item")
        im = selector.xpath("/ns:feed/ns:entry")

        if itms: ns = 'itms'
        if im: ns = 'im'

        items = itms or im
        item_type = self.do_get_type(chart)

        for rank, item in enumerate(items):
            entry = TomahawkItemLoader(selector=item)
            if ns is 'im':
                entry.add_xpath(item_type, '//im:name/text()')
                entry.add_xpath('artist', './/im:artist/text()')
            else:
                entry.add_xpath('album', './/itms:album/text()')
                entry.add_xpath('artist', './/itms:artist/text()')
            entry.add_value('rank', rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)