def parse(self, response): for repository in response.css('li.public'): item = GithubItem({ 'name': repository.xpath('.//a[@itemprop="name codeRepository"]/text()').re_first("\n\s*(.*)"), 'update_time': repository.xpath('.//relative-time/@datetime').extract_first() }) yield item
def parse(self, response): for github in response.css('li.public'): item = GithubItem({ 'name': github.css('div.mb-1 a::text').extract_first().strip(), 'update_time': github.css('relative-time ::attr(datetime)').extract_first() }) yield item
def parse(self, response): for repository in response.css('div.col-10'): item = GithubItem({ 'name': repository.css('h3 a::text').extract_first().split(), 'update_time': repository.css( 'relative-time::attr(datetime)').extract_first().split() }) yield item
def parse(self, response): for repository in response.css('li.public'): item = GithubItem() item['name'] = repository.xpath( './/a[@itemprop="name codeRepository"]/text()').re_first("\n\s*(.*)") item['update_time'] = repository.xpath( './/relative-time/@datetime').extract_first() repo_url = response.urljoin( repository.xpath('.//a/@href').extract_first()) request = scrapy.Request(repo_url, callback=self.parse_repo) request.meta['item'] = item yield request
def parse(self, response): for every in response.css('div#user-repositories-list li'): item = GithubItem({ 'name': every.css('div[class="d-inline-block mb-1"] a::text'). extract_first().strip(), 'update_time': every.css('relative-time::attr(datetime)').extract_first() }) yield item
def parse(self, response): for every in response.css('div#user-repositories-list li'): item = GithubItem() item['name'] = every.css('div[class="d-inline-block mb-1"] a::text' ).extract_first().strip() item['update_time'] = every.css( 'relative-time::attr(datetime)').extract_first() every_url = response.urljoin( every.css('div[class="d-inline-block mb-1"] a::attr(href)'). extract_first()) request = scrapy.Request(every_url, callback=self.sub_parse) request.meta['item'] = item yield request
def parse(self, response): for repos in response.css( 'li[class="col-12 d-block width-full py-4 border-bottom public source"]' ): item = GithubItem() item['name'] = repos.css( 'div[class="d-inline-block mb-1"] h3 a::text').extract_first( ).strip('\n').strip(), item['update_time'] = repos.css( 'relative-time::attr(datetime)').extract_first() repos_url = "https://github.com/shiyanlou/{}".format( item['name'][0]) print(repos_url) request = scrapy.Request(repos_url, callback=self.parse_more) request.meta['item'] = item yield request
def parse(self, response): for repository in response.css('li.public'): item = GithubItem() item['name'] = repository.xpath( './/a[@itemprop="name codeRepository"]/text()').re_first( r'\n\s*(.*)') item['update_time'] = repository.xpath( './/relative-time/@datetime').extract_first() repo_url = response.urljoin( repository.xpath('.//a/@href').extract_first()) request = scrapy.Request(repo_url, callback=self.parse_repo) request.meta['item'] = item yield request spans = response.css('div.pagination span.disabled::text') if len(spans) == 0 or spans[-1].extract() != 'Next': next_url = response.css( 'div.pagination a:last-child::attr(href)').extract_first() yield response.follow(next_url, callback=self.parse)