Beispiel #1
0
 def parse(self, response):
     for repository in response.css('li.public'):
         item = GithubItem({
             'name': repository.xpath('.//a[@itemprop="name codeRepository"]/text()').re_first("\n\s*(.*)"),
             'update_time': repository.xpath('.//relative-time/@datetime').extract_first()
         })
         yield item
Beispiel #2
0
 def parse(self, response):
     for github in response.css('li.public'):
         item = GithubItem({
             'name':
             github.css('div.mb-1 a::text').extract_first().strip(),
             'update_time':
             github.css('relative-time ::attr(datetime)').extract_first()
         })
         yield item
Beispiel #3
0
 def parse(self, response):
     for repository in response.css('div.col-10'):
         item = GithubItem({
             'name':
             repository.css('h3 a::text').extract_first().split(),
             'update_time':
             repository.css(
                 'relative-time::attr(datetime)').extract_first().split()
         })
         yield item
Beispiel #4
0
 def parse(self, response):
     for repository in response.css('li.public'):
         item = GithubItem()
         item['name'] = repository.xpath(
             './/a[@itemprop="name codeRepository"]/text()').re_first("\n\s*(.*)")
         item['update_time'] = repository.xpath(
             './/relative-time/@datetime').extract_first()
         repo_url = response.urljoin(
             repository.xpath('.//a/@href').extract_first())
         request = scrapy.Request(repo_url, callback=self.parse_repo)
         request.meta['item'] = item
         yield request
Beispiel #5
0
    def parse(self, response):

        for every in response.css('div#user-repositories-list li'):

            item = GithubItem({
                'name':
                every.css('div[class="d-inline-block mb-1"] a::text').
                extract_first().strip(),
                'update_time':
                every.css('relative-time::attr(datetime)').extract_first()
            })
            yield item
Beispiel #6
0
 def parse(self, response):
     for every in response.css('div#user-repositories-list li'):
         item = GithubItem()
         item['name'] = every.css('div[class="d-inline-block mb-1"] a::text'
                                  ).extract_first().strip()
         item['update_time'] = every.css(
             'relative-time::attr(datetime)').extract_first()
         every_url = response.urljoin(
             every.css('div[class="d-inline-block mb-1"] a::attr(href)').
             extract_first())
         request = scrapy.Request(every_url, callback=self.sub_parse)
         request.meta['item'] = item
         yield request
Beispiel #7
0
 def parse(self, response):
     for repos in response.css(
             'li[class="col-12 d-block width-full py-4 border-bottom public source"]'
     ):
         item = GithubItem()
         item['name'] = repos.css(
             'div[class="d-inline-block mb-1"] h3 a::text').extract_first(
             ).strip('\n').strip(),
         item['update_time'] = repos.css(
             'relative-time::attr(datetime)').extract_first()
         repos_url = "https://github.com/shiyanlou/{}".format(
             item['name'][0])
         print(repos_url)
         request = scrapy.Request(repos_url, callback=self.parse_more)
         request.meta['item'] = item
         yield request
Beispiel #8
0
    def parse(self, response):
        for repository in response.css('li.public'):
            item = GithubItem()
            item['name'] = repository.xpath(
                './/a[@itemprop="name codeRepository"]/text()').re_first(
                    r'\n\s*(.*)')
            item['update_time'] = repository.xpath(
                './/relative-time/@datetime').extract_first()
            repo_url = response.urljoin(
                repository.xpath('.//a/@href').extract_first())
            request = scrapy.Request(repo_url, callback=self.parse_repo)
            request.meta['item'] = item
            yield request

        spans = response.css('div.pagination span.disabled::text')
        if len(spans) == 0 or spans[-1].extract() != 'Next':
            next_url = response.css(
                'div.pagination a:last-child::attr(href)').extract_first()
            yield response.follow(next_url, callback=self.parse)