Ejemplo n.º 1
0
 def parse(self, response):
     div_list = response.xpath(
         '/html/body/div[1]/div/div[4]/div[2]/div[@class="item clearfix"]')
     # xp = '/html/body/div[1]/div/div[3]/div/div'
     # num = 1
     for div in div_list:
         item = educationItem()
         item['museumID'] = 3
         name = div.xpath('./div[5]/div[1]/text()').extract()
         name = ''.join(name)
         # print(name)
         item['eduName'] = name
         img = div.xpath('./div[4]/@data-src').extract_first()
         # img = ''.join(img)
         print(img)
         if img[0] == "/":
             img = "http://www.sstm.org.cn" + img
         print(img)
         item['eduImg'] = img
         time = div.xpath('./div[5]/div[2]/span[2]/text()').extract()
         time = ''.join(time)
         cont = div.xpath('./div[5]/div[3]/div[3]/text()').extract()
         cont = ''.join(cont)
         cont = cont + ' 活动时间:' + time
         # print(cont)
         item['eduContent'] = cont
         yield item
         self.num += 1
Ejemplo n.º 2
0
 def parse(self, response):
     item = educationItem()
     item_list = response.xpath(
         "//div[@class='train_5']//div[@class='li']//a/@href").getall()
     for index, i in enumerate(item_list):
         yield scrapy.Request("http://www.zunyihy.cn" + i,
                              callback=self.parse_content)
Ejemplo n.º 3
0
 def parse(self, response):
     item = educationItem()
     #scrapy crawl education147
     div_list = response.xpath('/html/body/div[3]/div[2]/div[2]/div/div/li')
     for li in div_list:
         name = li.xpath('.//a/text()').extract_first()
         print(name)
 def parse(self, response):
     item = educationItem()
     item_list = response.xpath(
         '//ul[@class="newslist"]/li/a/@href').getall()
     for index, i in enumerate(item_list):
         yield scrapy.Request("http://www.beilin-museum.com" + i,
                              callback=self.parse_content)
 def parse(self, response):
     item = educationItem()
     item_list = response.xpath(
         "//div[@class='li scaleimg']//a/@href").getall()
     for index, i in enumerate(item_list):
         yield scrapy.Request(response.urljoin(i),
                              callback=self.parse_content)
 def parse(self, response):
     div_list = response.xpath('//*[@id="p_list"]/div')
     # //*[@id="p_item"]/table/tbody/tr[2]/td/div/a
     # xp = '/html/body/div[1]/div/div[3]/div/div'
     # num = 1
     for div in div_list:
         item = educationItem()
         # if div.xpath('./tbody/tr/td[2]/table[1]/tbody/tr/td/a/text()').extract():
         name = div.xpath('./table/tbody/tr[2]/td/div/a/text()').extract()
         # /html/body/table[2]/tbody/tr/td/table/tbody/tr[2]/td[2]/table/tbody/tr[2]/td/table[2]/tbody/tr/td[2]/table[1]/tbody/tr/td/a
         # /html/body/table[2]/tbody/tr/td/table/tbody/tr[2]/td[2]/table/tbody/tr[2]/td/table
         # /html/body/table[2]/tbody/tr/td/table/tbody/tr[2]/td[2]/table/tbody/tr[2]/td/table[2]/tbody/tr/td[2]/table[2]/tbody/tr/td
         # /html/body/table[2]/tbody/tr/td/table/tbody/tr[2]/td[2]/table/tbody/tr[2]/td/table[2]/tbody/tr/td[1]/table/tbody/tr/td/a/img
         name = ''.join(name)
         print(name)
         item['eduName'] = name
         # //*[@id="p_item"]/table/tbody/tr[1]/td/table/tbody/tr/td/a/img
         img = div.xpath('./table/tbody/tr[1]/td/table/tbody/tr/td/a/img/@src').extract()
         img = ''.join(img)
         # if img[0] == '/':
         img = 'http://www.ssmzd.com' + img
         print(img)
         item['eduImg'] = img
         # detail_url = "http://www.njmuseum.com" + div.xpath('./a/@href').extract_first()
         # print(detail_url)
         # cont = div.xpath('./table/tbody/tr[2]/td/div/a/@href').extract()
         # cont = ''.join(cont)
         # cont = 'http://www.ssmzd.com' + cont
         # print(cont)
         item['eduContent'] = '暂无'
         item['museumID'] = 12
         yield item
         self.num += 1
Ejemplo n.º 7
0
 def parse(self, response):
     # /html/body/section[2]/div[2]/div[2]/ul
     li_list = response.xpath('/html/body/section[2]/div[2]/div[2]/ul/li')
     # print(li_list)
     # xp = '/html/body/div[1]/div/div[3]/div/div'
     # num = 1
     for li in li_list:
         item = educationItem()
         name = li.xpath('./div[@class="title"]//text()').extract()
         name = ''.join(name)
         print(name)
         img = li.xpath('./div[@class="l"]/img[1]/@src').extract()
         img = ''.join(img)
         img = 'http://www.wuhouci.net.cn' + img
         print(img)
         cont = li.xpath('./div[@class="r"]/div[2]//text()').extract()
         cont = ''.join(cont)
         time = li.xpath('./div[2]/div[3]/div[2]//text()').extract()
         # /html/body/section[2]/div[2]/div[2]/ul/li[1]/div[2]/div[3]/div[2]
         time = ''.join(time)
         cont = cont + '\n时间:' + time
         print(cont)
         item['eduContent'] = cont
         item['museumID'] = 13
         item['eduName'] = name
         item['eduImg'] = img
         yield item
         self.num += 1
Ejemplo n.º 8
0
 def parse(self, response):
     div_list = response.xpath(
         '/html/body/div[4]/div[3]/div[2]/div/div[2]/div/div[@class="jchg-cont"]'
     )
     # print(div_list)
     # //*[@id="hd1-4"]/div[2]/div/div[1]/div/div[4]/div[2]/a/h2
     for div in div_list:
         item = educationItem()
         img = str(div.xpath('./a/img/@src').extract_first())
         img = img.replace(img[0], '', 1)
         img = "https://cstm.cdstm.cn/jyhd/zkgdjt" + img
         print(img)
         item['eduImg'] = img
         # name = div.xpath('./div[2]/a/h2/text()').extract_first()
         # print(name)
         url = str(div.xpath('./a/@href').extract_first())
         url = url.replace(url[0], '', 1)
         # print(url)
         url_use = "https://cstm.cdstm.cn/jyhd/zkgdjt" + url
         # print(url_use)
         yield scrapy.Request(url_use,
                              callback=self.parse_detail,
                              meta={
                                  'item': item,
                                  'url': url_use
                              })
Ejemplo n.º 9
0
 def parse(self, response):
     item = educationItem()
     #/html/body/table[4]/tbody/tr/td[3]/table/tbody/tr[1]/td/table[2]/tbody/tr[1]/td/table/tbody/tr/td[1]/a
     edu_name = response.xpath('/html/body/table[4]/tbody/tr/td[3]/table/tbody/tr[1]/td/table[2]/tbody/tr[1]/td/table/tbody/tr/td[1]/a/text()').extract_first()
     print(edu_name)
     if response.xpath('/html/body/table[4]/tbody/tr/td[3]/table/tbody/tr[1]/td/table[2]/tbody/tr[1]/td/table/tbody/tr/td[1]/a/@href'):
         detail_url = 'http://www.tengzhoumuseum.com' + response.xpath('/html/body/table[4]/tbody/tr/td[3]/table/tbody/tr[1]/td/table[2]/tbody/tr[1]/td/table/tbody/tr/td[1]/a/@href').extract_first()
         yield scrapy.Request(detail_url,callback=self.parse_detail,meta={'item':item})
 def parse(self, response):
     item = educationItem()
     coll_list = json.loads(response.text)["data"]
     for i in coll_list:
         educationName = i["title"]
         educationDescription = i["abstracts"]
         educationImageUrl = "https://www.gzchenjiaci.com" + i["imgurl"]
         print((educationName, educationDescription, educationImageUrl))
Ejemplo n.º 11
0
 def parse(self, response):
     item = educationItem()
     coll_list = json.loads(response.text)["body"]["list"]
     for i in coll_list:
         educationName = i["title"]
         educationDescription = i["description"]
         educationImageUrl = i["litPic"]
         print((educationName, educationDescription, educationImageUrl))
 def parse(self, response):
     item = educationItem()
     coll_list = json.loads(response.text)["data"]["recordsList"]
     for i in coll_list:
         educationName = i["vTitle"]
         educationDescription = i["contentSummary"]
         educationImageUrl = i["imgUrl"]
         print((educationName, educationDescription, educationImageUrl))
Ejemplo n.º 13
0
 def parse(self, response):
     item = educationItem()
     coll_list = json.loads(response.text)['data']['old_list']
     for i in coll_list:
         educationName = i["title"]
         educationDescription = i["brief_desc"]
         educationImageUrl = "http://www.tibetmuseum.com.cn/" + i["list_img"]
         print((educationName, educationImageUrl, educationDescription))
Ejemplo n.º 14
0
 def parse_content(self, response):
     item = educationItem()
     educationImageUrl = "http://www.sunyat-sen.org" + \
         response.xpath("//div[@class='conBox']//img/@src").get()
     educationName = response.xpath("//h3/text()").get()
     educationDescription = "".join("".join(
         response.xpath(
             "//div[@class='contentBox']/p/text()").getall()).split())
     print((educationName, educationImageUrl, educationDescription))
Ejemplo n.º 15
0
 def parse(self, response):
     item = educationItem()
     coll_list = json.loads(response.text)["data"]
     for i in coll_list:
         coll_name = i["activityName"]
         #coll_desc = i[""]
         coll_img = i["picUrl"]
         print(coll_name)
         print(coll_img)
 def parse(self, response):
     item = educationItem()
     #scrapy crawl education136
     name = "徽博研学游"
     img = "http://www.hzwhbwg.com/upfiles/image/15528711260.jpg"
     cont = "随着我国素质教育的全面推进,学生的综合实践活动课越来越受到人们的关注和追捧。其中,研学旅行是研究性学习和旅行体验相结合的校外教育活动和综合实践活动课程,而博物馆因其丰富的文化资源以及独特的文化魅力,渐渐地成为了各类研学旅行的重要目的地之一。黄山市已成为“全国首批研学旅行目的地城市”,中国徽州文化博物馆作为全国唯一全面展现徽州文化的历史文化专题博物馆,吸引了众多大中小学校研学团队前来探秘学习。中国徽州文化博物馆日接待各地各级学生研学团队等观众1500人次以上仍为常态,同学们以班级为单位,在博物馆讲解员和志愿者的带领下,有序地进入各大展厅,欣赏着一件件文物展品,观摩着一处处复古场景,瞻仰着一尊尊人物雕像,聆听着一个个源远流长的徽州历史人文故事,一种肃然起敬的感情油然而生。不少同学纷纷感慨,在中国徽州文化博物馆,琳琅满目、造型各异的展品让人们大开眼界,更为古人精湛的技艺折服。"
     print(name)
     print(img)
     print((cont))
 def parse(self, response):
     item = educationItem()
     title_list = response.xpath("//span[@class='ct']//a/text()").getall()
     img_list = response.xpath("//li[@class='clearfix']//img/@src").getall()
     for index, i in enumerate(img_list):
         educationName = title_list[index]
         educationDescription = "无介绍"
         educationImageUrl = i
         print((educationName, educationImageUrl, educationDescription))
Ejemplo n.º 18
0
 def parse_content(self, response):
     item = educationItem()
     educationImageUrl = response.urljoin(response.xpath(
         "//div[@class='dsj-item-detail-content']//img/@src").get())
     educationName = response.xpath(
         "//p[@class='nbsp-sp-detail-title']/text()").get()
     educationDescription = "".join("".join(response.xpath(
         "//div[@class='dsj-item-detail-content']/p/span/text()").getall()).split())
     print((educationName, educationImageUrl,  educationDescription))
 def parse(self, response):
     item = educationItem()
     li_list = response.xpath('/html/body/div/div[3]/div/div/div[2]/div[2]/div/div/ul/li')
     for li in li_list:
         name = li.xpath('./a/div[2]/h3/text()').extract()
         name = ''.join(name)
         print(name)
         detail_url = "https://www.tjbwg.com/cn/" + li.xpath('./a/@href').extract_first()
         yield scrapy.Request(url=detail_url,callback=self.parse_detail)
 def parse(self, response):
     item = educationItem()
     coll_list = json.loads(response.text)["Rows"]
     for i in coll_list:
         coll_name = i["Name"]
         coll_desc = i["Describe"]
         #coll_img = i[""]
         print(coll_name)
         print(coll_desc)
 def parse(self, response):
     item = educationItem()
     #/html/body/div[5]/div/div[1]/div/div[1]
     #/html/body/div[5]/div/div[1]/div/div[1]/h2
     #/html/body/div[5]/div/div[1]/div/div[1]/h2
     name = response.xpath(
         '/html/body/div[5]/div/div[1]/div/div[1]/h2//text()').extract()
     name = ''.join(name)
     print(name)
 def parse_content(self, response):
     item = educationItem()
     educationName = response.xpath(
         "//div[@class='news_conent_two_title']/text()").get()
     educationDescription = "".join("".join(response.xpath(
         "//p[@class='MsoNormal']//text()").getall()).split()).replace("\xa0", "")
     educationImageUrl = "http://www.gzsmzmuseum.cn/" + \
         response.xpath(
             "//p[@class='MsoNormal']/img/@src").get(default="images/main_logo.png")
     print((educationName, educationImageUrl, educationDescription))
Ejemplo n.º 23
0
 def parse(self, response):
     item = educationItem()
     #scrapy crawl education160
     div_list = response.xpath('/html/body/div[2]/div[2]/div/div[2]/div[2]/div[1]/ul/li')
     for li in div_list:
         name = li.xpath('./a/text()').extract_first()
         print(name)
         detail_url=li.xpath('./a/@href').extract_first()
         detail_url='https://www.shmmc.com.cn'+detail_url
         print(detail_url)
         yield scrapy.Request(detail_url,callback=self.parse_detail,meta={'item':item})
 def parse(self, response):
     item = educationItem()
     title_list = response.xpath("///h4/a/text()").getall()
     description_list = response.xpath(
         "//li[@class='top']/p/text()").getall()
     img_list = response.xpath("//li[@class='top']/a/img/@src").getall()
     for index, i in enumerate(img_list):
         educationName = title_list[index].split()[0]
         educationDescription = "".join(description_list[index].split())
         educationImageUrl = "https://www.gzam.com.cn" + i
         print((educationName, educationImageUrl, educationDescription))
 def parse(self, response):
     item = educationItem()
     #scrapy crawl education124
     _list = response.xpath('/html/body/div[1]/div[3]/div/div[2]/ul/li')
     for li in _list:
         name = li.xpath('./a/text()').extract_first()
         print(name)
         detail_url = li.xpath('./a/@href').extract_first()
         #detail_url = 'http://www.zgtcbwg.com' + detail_url
         print(detail_url)
         yield scrapy.Request(url=detail_url, callback=self.parse_detail)
Ejemplo n.º 26
0
    def parse(self, response):
        item = educationItem()

        #edu_list = response.xpath('/html/body/div[4]/div')
        #//*[@id="thumbnailUL"]
        #for div in edu_list:
        #/html/body/div[4]/div[2]/div[1]/a/p[1] /html/body/div[6]/div[4]/div[1]/div[9]/div/table/tbody/tr[1]/td/table[2]/tbody/tr[1]/td[2]/a/b
        edu_name = response.xpath(
            '/html/body/div[6]/div[4]/div[1]/div[9]/div/table/tbody/tr[1]/td/table[2]/tbody/tr[1]/td[2]/a/b/text()'
        ).extract_first()
        print(edu_name)
Ejemplo n.º 27
0
 def parse(self, response):
     item = educationItem()
     
     edu_list = response.xpath('/html/body/div[3]/div/div[2]/ul/li')
     for div in edu_list:
         edu_name = div.xpath('./a/text()').extract_first()
         print(edu_name)
         
         detail_url = div.xpath('./a/@href').extract_first()
         detail_url = 'http://museum.linyi.cn/' + detail_url
         yield scrapy.Request(detail_url,callback=self.parse_detail,meta={'item':item})
 def parse(self, response):
     item = educationItem()
     
     edu_list = response.xpath('//*[@id="c"]/div')
     for div in edu_list:
         
         edu_name = div.xpath('./div[3]/a/@title').extract_first()
         print(edu_name)           
         detail_url = 'http://museum.sdu.edu.cn/' + div.xpath('./div[3]/a/@href').extract_first()
         edu_img = 'http://museum.sdu.edu.cn' + div.xpath('./div[2]/a/img/@src').extract_first()
         print(edu_img)
         yield scrapy.Request(detail_url,callback=self.parse_detail,meta={'item':item})
    def parse(self, response):
        item = educationItem()

        edu_list = response.xpath('/html/body/div[5]/div/div/div[2]/div')
        for div in edu_list:
            #/a[1]
            if div.xpath('./a[1]/@href'):
                detail_url = 'http://www.qdyzyzmuseum.com' + div.xpath(
                    './a[1]/@href').extract_first()
            yield scrapy.Request(detail_url,
                                 callback=self.parse_detail,
                                 meta={'item': item})
Ejemplo n.º 30
0
 def parse(self, response):
     item = educationItem()
     #scrapy crawl education130
     div_list = response.xpath('//*[@class="articleBox_list"]/ul/li')
     for li in div_list:
         name = li.xpath('./a/text()').extract_first()
         print(name)
         detail_url = li.xpath('./a/@href').extract_first()
         detail_url = 'http://www.qzhjg.cn' + detail_url
         print(detail_url)
         yield scrapy.Request(detail_url,
                              callback=self.parse_detail,
                              meta={'item': item})