def get_subcategories(self, response): menu_items = response.css('.action-bar > .action-bar-item') if len(menu_items) > 1: urls = get_extracted(menu_items, 1).css('ul.submenu-item-wrapper a::attr(href)').extract() for url in urls: yield Request(urljoin(response.url, url), callback=self.get_subcategories) else: urls = response.css('.cluster-heading > .title-link::attr(href)').extract() for url in urls: yield Request(urljoin(response.url, url), callback=self.get_apps)
def get_subcategories(self, response): menu_items = response.css('.action-bar > .action-bar-item') if len(menu_items) > 1: urls = get_extracted( menu_items, 1).css('ul.submenu-item-wrapper a::attr(href)').extract() for url in urls: yield Request(urljoin(response.url, url), callback=self.get_subcategories) else: urls = response.css( '.cluster-heading > .title-link::attr(href)').extract() for url in urls: yield Request(urljoin(response.url, url), callback=self.get_apps)
def parse(self, response): javascript = "".join(response.xpath('//script[contains(text(), "sharedData")]/text()').extract()) json_data = json.loads("".join(re.findall(r'window._sharedData = (.*);', javascript))) item = InstagramProfileItems() data = get_extracted(json_data["entry_data"]["UserProfile"]) item["is_private"] = data["relationship"]["is_private"] item["posts"] = data["userMedia"] item["username"] = data["user"]["username"] item["bio"] = data["user"]["bio"] item["website"] = data["user"]["website"] item["profile_picture"] = data["user"]["profile_picture"] item["full_name"] = data["user"]["full_name"] item["total_posts"] = data["user"]["counts"]["media"] item["followers"] = data["user"]["counts"]["followed_by"] item["following"] = data["user"]["counts"]["follows"] return item
def parse(self, response): javascript = "".join( response.xpath( '//script[contains(text(), "sharedData")]/text()').extract()) json_data = json.loads("".join( re.findall(r'window._sharedData = (.*);', javascript))) item = InstagramProfileItems() data = get_extracted(json_data["entry_data"]["UserProfile"]) item["is_private"] = data["relationship"]["is_private"] item["posts"] = data["userMedia"] item["username"] = data["user"]["username"] item["bio"] = data["user"]["bio"] item["website"] = data["user"]["website"] item["profile_picture"] = data["user"]["profile_picture"] item["full_name"] = data["user"]["full_name"] item["total_posts"] = data["user"]["counts"]["media"] item["followers"] = data["user"]["counts"]["followed_by"] item["following"] = data["user"]["counts"]["follows"] return item