コード例 #1
0
    def parse(self,response):
        for repository in response.css('li.public'):
	        item = ShiyanlougithubItem({
				'name':repositories.css('a[itemprop="name codeRepository"]::text').extract_first().strip(),
				'update_time':repositories.css('relative-time::attr(datetime)').extract_first()
            })
            yield item
コード例 #2
0
ファイル: shiyanlou.py プロジェクト: sjylawyer/shiyanlou
 def parse(self, response):
     for repository in response.css('li.public'):
         item = ShiyanlougithubItem({
             'name': repository.xpath('.//a[@itemprop="name codeRepository"]/text()').re_first(r'\n\s*(.*)'),
             'update_time': repository.xpath('.//relative-time/@datetime').extract_first()
         })
         yield item 
コード例 #3
0
 def parse(self, response):
     for course in response.css('li[class="col-12 d-block width-full py-4 border-bottom public source"]'):
         item=ShiyanlougithubItem({
             "name":course.css("h3 a::text").re_first('[^\S]*(\S+)[^\S]*'),
             "update_time":course.css("div.f6.text-gray.mt-2 relative-time::attr(datetime)").extract_first()
         })
         print(item)
         yield item
コード例 #4
0
ファイル: githubs.py プロジェクト: zmxhdu/shiyanlou
 def parse(self, response):
     for github in response.xpath('.//li[contains(@itemprop,"owns")]'):
         item = ShiyanlougithubItem()
         item['name'] = github.xpath('.//a[contains(@itemprop,"name codeRepository")]/text()[1]').extract_first().strip(),
         item['update_time'] = github.xpath('.//div[@class="f6 text-gray mt-2"]/relative-time/@datetime').extract_first()
         github_url = response.urljoin(github.xpath('.//div[@class="d-inline-block mb-1"]/h3/a/@href').extract_first())
         request = scrapy.Request(github_url, callback=self.parse_detail)
         request.meta['item'] = item
         yield request
コード例 #5
0
 def parse(self, response):
     for repos in response.css('li.py-4'):
         yield ShiyanlougithubItem({
             'name':
             repos.xpath('.//a[contains(@itemprop,"codeRe")]/text()').
             re_first('\s*(.+)'),
             'update_time':
             repos.xpath('.//relative-time/@datetime').extract_first()
         })
コード例 #6
0
ファイル: repositories.py プロジェクト: lo-nmei/scrapy
 def parse(self, response):
     for rep in response.css('li.col-12'):
         item = ShiyanlougithubItem({
             'name':
             rep.xpath('.//h3/a/text()').extract_first().strip(),
             'update_time':
             rep.xpath('.//relative-time/@datetime').re_first('(.+)Z')
         })
         yield item
コード例 #7
0
ファイル: sylgithub.py プロジェクト: WendyLuo0625/Scrapy
 def parse(self, response):
     for repository in response.css('div#user-repositories-list ul li'):
         item = ShiyanlougithubItem({
             'name':
             repository.xpath('.//div/h3/a/text()').extract_first().strip(),
             'update_time':
             repository.xpath(
                 './/div/relative-time/@datetime').extract_first()
         })
         yield item
コード例 #8
0
    def parse(self, response):
        for repo in response.css('li.col-12'):
            item = ShiyanlougithubItem()

            item['name']= repo.css('li.col-12 div.d-inline-block a::text').re_first("\n\s* (.+)"),
            item['update_time']= repo.css('li.col-12 div.f6 relative-time::attr(datetime)').extract_first()
            repository_url = response.urljoin(repo.css('li.col-12 div.d-inline-block a::attr(href)').extract_first())
            request = scrapy.Request(repository_url, callback=self.parse_details)
            request.meta['item'] = item
            yield request
コード例 #9
0
 def parse(self, response):
     for course in response.xpath('//div[@class="d-inline-block mb-1"]'):
         yield ShiyanlougithubItem({
             'name':
             course.xpath('.//h3/a/text()').re_first('^\s*(.*)'),
             'update_time':
             course.xpath(
                 '..//div[@class="f6 text-gray mt-2"]/relative-time').
             re_first('<relative-time datetime="(.*)"')
         })
コード例 #10
0
 def parse(self, response):
     for repository in response.css('li.public'):
         item = ShiyanlougithubItem()
         item['name'] = repository.xpath('.//a[@itemprop="name codeRepository"]/text()').re_first("\n\s*(.*)")
         item['update_time'] = repository.xpath('.//relative-time/@datetime').extract_first()
         item['description'] = response.xpath('.//p[@itemprop="description"]/text()').re_first('\n\s*(.*)\s')
         repo_url = response.urljoin(repository.xpath('.//a/@href').extract_first())
         request = scrapy.Request(repo_url, callback=self.parse_repo)
         # put the data which we got to meta temporary and make a request
         request.meta['item'] = item
         yield request
コード例 #11
0
ファイル: repository.py プロジェクト: wbhhbw/shiyanlou-001
 def parse(self, response):
     for repos in response.css('li.col-12'):
         item = ShiyanlougithubItem({
             "name":
             repos.xpath(
                 './/a[contains(@itemprop, "name codeRepository")]/text()').
             re_first('[^\w]*(\w*)'),
             "update_time":
             repos.xpath('.//relative-time/@datetime').extract_first()
         })
         yield item
コード例 #12
0
ファイル: craw_github.py プロジェクト: Stenphen82/shiyanlou
 def parse(self, response):
     for repo in response.css('li.col-12'):
         item = ShiyanlougithubItem({
             'name':
             repo.css('li.col-12 div.d-inline-block a::text').re_first(
                 "\n\s* (.+)"),
             'update_time':
             repo.css('li.col-12 div.f6 relative-time::attr(datetime)').
             extract_first()
         })
         yield item
コード例 #13
0
ファイル: repositories.py プロジェクト: lo-nmei/scrapy
 def parse(self, response):
     for rep in response.css('li.col-12'):
         item = ShiyanlougithubItem()
         item['name'] = rep.xpath('.//h3/a/text()').extract_first().strip()
         item['update_time'] = rep.xpath(
             './/relative-time/@datetime').re_first('(.+)Z')
         course_url = response.urljoin(rep.xpath('@href').extract_first())
         request = scrapy.Request(url=course_url,
                                  callback=self.parse_course)
         request.meta['item'] = item
         yield request
コード例 #14
0
 def parse(self, response):
     for repo in response.xpath("//ul[contains(@data-filterable-type, 'substring')]/li"):
             # 使用 xpath 语法对每个 course 提取数据
         item = ShiyanlougithubItem({
             "name": repo.xpath(".//div[1]/h3/a/text()").re_first('[\n ]*([\d\w]*)[\n ]*'),
             "update_time": repo.xpath(".//div[3]/relative-time/@datetime").extract_first()
         })
         repo_url = response.urljoin(repository.xpath('.//a/@href').extract_first())
         request = scrapy.Request(repo_url, callback=self.parse_repo)
         request.meta['item'] = item
         yield request
コード例 #15
0
	def parse(self, response):
		print(response)
		for gitlou in response.css('div#user-repositories-list ul li'):
			item =  ShiyanlougithubItem({
				'name':gitlou.css('a::text').extract_first(),
				'update_time':gitlou.css('relative-time::attr(datetime)').extract_first()
				})
			url = response.urljoin(gitlou.css('a::attr(href)').extract_first())
			print('url',url)
			request = scrapy.Request(url,callback = self.parse_detail)
			request.meta['item'] = item
			yield request
コード例 #16
0
ファイル: github.py プロジェクト: adaonline/pythonCourse
 def parse(self, response):
     for course in response.css('li.public'):
         item=ShiyanlougithubItem()
         item["name"]=course.css("h3 a::text").re_first('[^\S]*(\S+)[^\S]*')
         item["update_time"]=course.css("div.f6.text-gray.mt-2 relative-time::attr(datetime)").extract_first()
         
         url=response.urljoin(course.css("a::attr(href)").extract_first())
         print('#'*20+url)
         #print(item)
         request=scrapy.Request(url,callback=self.parse_pro,dont_filter=True)
         request.meta['item']=item
         yield request
コード例 #17
0
 def parse(self, response):
     for repo in response.xpath(
             "//ul[contains(@data-filterable-type, 'substring')]/li"):
         # 使用 xpath 语法对每个 course 提取数据
         yield ShiyanlougithubItem({
             "name":
             repo.xpath(".//div[1]/h3/a/text()").re_first(
                 '[\n ]*([\d\w]*)[\n ]*'),
             "update_time":
             repo.xpath(
                 ".//div[3]/relative-time/@datetime").extract_first()
         })
コード例 #18
0
 def parse(self, response):
     for repository in response.css('li.public'):
         item = ShiyanlougithubItem()
         item['name'] = repository.xpath(
             './/a[@itemprop="name codeRepository"]/text()').re_first(
                 "\n\s*(.*)")
         item['update_time'] = repository.xpath(
             './/relative-time/@datetime').extract_first()
         new_url = response.urljoin(
             repository.xpath('.//a/@href').extract_first())
         request = scrapy.Request(new_url, callback=self.parse_new)
         request.meta['item'] = item
         yield request
コード例 #19
0
 def parse(self, response):
     for repository in response.css("li.public"):
         item = ShiyanlougithubItem({
             "name":
             repository.xpath(".//a[@itemprop='name codeRepository']/text()"
                              ).re_first("\n +(.+)"),
             "update_time":
             repository.xpath(
                 ".//relative-time//@datetime").extract_first(),
         })
         repo_url = response.urljoin(
             repository.xpath(".//a/@href").extract_first())
         request = scrapy.Request(repo_url, callback=self.parse_repo)
         request.meta['item'] = item
         yield request
コード例 #20
0
    def parse(self, response):
        for course in response.xpath('//div[@class="d-inline-block mb-1"]'):
            item = ShiyanlougithubItem()
            # yield ShiyanlougithubItem({
            item['name'] = course.xpath('.//h3/a/text()').re_first('^\s*(.*)')
            item['update_time'] = course.xpath(
                '..//div[@class="f6 text-gray mt-2"]/relative-time').re_first(
                    '<relative-time datetime="(.*)"')

            #     })
            reo_url = response.urljoin(
                course.xpath('.//h3/a/@href').extract_first())
            request = scrapy.Request(reo_url, callback=self.parse_cbr)
            request.meta['item'] = item
            yield request
コード例 #21
0
 def parse(self, response):
     for repos in response.css('li.col-12'):
         item = ShiyanlougithubItem()
         item['name'] = repos.xpath(
             './/a[contains(@itemprop, "name codeRepository")]/text()'
         ).re_first('[^\w]*(\w*)')
         item['update_time'] = repos.xpath(
             './/relative-time/@datetime').extract_first()
         repos_url = response.urljoin(
             repos.xpath(
                 './/a[contains(@itemprop, "name codeRepository")]/@href').
             extract_first())
         request = scrapy.Request(url=repos_url, callback=self.parse_repos)
         request.meta['item'] = item
         yield request
コード例 #22
0
    def parse(self, response):
        for repository in response.css('div#user-repositories-list > ul > li'):
            item = ShiyanlougithubItem()

            item['name'] = repository.css('div.d-inline-block.mb-1 \
            h3 a::text').re_first(r'[^\S]*(.+)[^\S]*')

            item['update_time'] = repository.css('div.f6.text-gray.mt-2 \
            relative-time::attr(datetime)').extract_first()

            repository_url = response.urljoin(repository.xpath('.//a/@href\
            ').extract_first())
            request = scrapy.Request(repository_url, callback=self.parse_repo)
            request.meta['item'] = item

            yield request
コード例 #23
0
ファイル: repositories.py プロジェクト: Esun127/shiyanlou
    def parse(self, response):
        for item in response.xpath('//li[contains(@class, "width-full")]'):
            yield ShiyanlougithubItem({
                'name':
                item.xpath('.//div[contains(@class, "mb-1")]/h3/a/text()'
                           ).re_first(r'\s+(\S+)'),
                'update_time':
                item.xpath(
                    './/div[contains(@class, "f6")]/relative-time/@datetime').
                extract_first()
            })

        nextpg = response.xpath('//div[@class="pagination"]/a')
        for page in nextpg:
            if page.xpath('.//text()').extract_first() == 'Next':
                url = page.xpath('.//@href').extract_first()
                yield scrapy.Request(url=url, callback=self.parse)
コード例 #24
0
 def parse(self, response):
     for repo in response.css('div#user-repositories-list li'):
         name = repo.css('h3 a::text').re_first(' +(.*)')
         update_time = repo.css(
             'relative-time::attr(datetime)').extract_first()
         yield ShiyanlougithubItem(name=name, update_time=update_time)
コード例 #25
0
 def parse(self, response):
     for repo in response.css('li.col-12'):
         yield ShiyanlougithubItem({
             'name': repo.css('div.d-inline-block a::text').extract_first().strip(),
             'update_time': repo.css('div.f6 relative-time::attr(datetime)').extract_first()
             })