Exemple #1
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 120
        item["museumName"] = '西安博物院'
        item["Location"] = '陕西省西安市南门外友谊西路'
        #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'http://www.xabwy.com'
        item["opentime"] = response.xpath(
            'normalize-space(/html//div[4]/div[2]/div[2]/text()[1])'
        ).extract_first()
        item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = response.xpath(
            'normalize-space(/html//div[4]/div[2]/div[2]/text()[2])'
        ).extract_first()
        item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'http://www.xabwy.com/Statics/2020.01/66.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #2
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 107
        item["museumName"] = '遵义会议纪念馆'
        item["Location"] = response.xpath(
            '/html//div[4]/div[2]/div/div[1]/div/div[2]/div[3]/div[1]/text()'
        ).extract_first()
        item["Location"] = str(item["Location"]).replace(u'\\xa0', u'')
        item["Location"] = str(item["Location"]).replace(u'\xa0', u'')
        item["Link"] = 'http://www.zunyihy.cn/'
        item["opentime"] = response.xpath(
            'normalize-space(/html//div[4]/div[2]/div/div[1]/div/div[2]/div[2])'
        ).extract_first()
        item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u'')
        item["opentime"] = str(item["opentime"]).replace(u'\xa0', u'')
        item["telephone"] = response.xpath(
            '/html/body/div[4]/div[2]/div/div[1]/div/div[2]/div[3]/div[3]/text()'
        ).extract_first()
        item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u'')
        item["telephone"] = str(item["telephone"]).replace(u'\xa0', u'')
        url = 'http://www.zunyihy.cn/about.html#about2'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #3
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 122
        item["museumName"] = '大唐西市博物馆'
        item["Location"] = response.xpath(
            'normalize-space(/html/body/div/form/div[4]/div[2]/div[2]/p[5]/span)'
        ).extract_first()
        item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'http://www.dtxsmuseum.com/'
        item[
            "opentime"] = '全年开放(每周一及除夕闭馆,法定节假日正常开放),夏季:9:00-17:30(16:30停止票务办理),冬季:9:00-17:00(16::0停止票务办理)'
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = response.xpath(
            'normalize-space(/html/body/div/form/div[4]/div[2]/div[2]/p[9]/span)'
        ).extract_first()
        item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'http://www.dtxsmuseum.com/news_show.aspx?id=1'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #4
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 111
        item["museumName"] = '重庆红岩历史博物馆'
        item["Location"] = '重庆市渝中区红岩村52号'
        item["Link"] = 'http://www.hongyan.info/'
        item["opentime"] = '1月1日-12月31日 09:00-17:00'
        item["telephone"] = '023-63300192 63303065'
        url = 'http://www.hongyan.info/'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #5
0
 def parse(self, response):
     item=MuseumsItem()
     item["museumID"]=119
     item["museumName"]='西安半坡博物馆'
     item["Location"]='西安市半坡路155号'
     item["Link"]='http://www.banpomuseum.com.cn/'
     item["opentime"]='旺季:(3月1日-11月30日)8:00----17:30  淡季:(12月1日-2月底)8:00----17:00'
     item["telephone"]='联系电话:029-62815385 投诉电话:18729251954'
     url='http://www.banpomuseum.com.cn/'
     # 处理详情页
     
     yield scrapy.Request(
         url,
         callback=self.parse_detail,
         meta={"item":item}#传递参数
     )
Exemple #6
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 113
        item["museumName"] = '西藏博物馆'
        item["Location"] = '西藏自治区拉萨市城关区民族南路2号'
        item["Link"] = 'http://www.tibetmuseum.com.cn'
        item[
            "opentime"] = '夏秋季(5月1日至10月31日):09:30-17:30 (17:00游客停止入场)冬春季(11月1日至次年4月30日):10:30-17:00(16:30游客停止入场)'
        item["telephone"] = '0891-6835244 0891-6812210'
        url = 'http://www.tibetmuseum.com.cn/zh-CN/brief/historyRecord?isNav=yes&navIndex=1'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #7
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 105
        item["museumName"] = '成都金沙遗址博物馆'
        item["Location"] = '四川省成都市青羊区金沙遗址路2号'
        item["Link"] = 'http://www.jinshasitemuseum.com/'
        item[
            "opentime"] = '夏令时8:30-20:00;冬令时8:30-18:30;周一闭馆'  #response.xpath("/html/body/div[1]/section/footer/div[1]/div/text()[2]").extract_first()
        item["telephone"] = response.xpath(
            "/html/body/div[4]/div[1]/div[1]/div/a[5]/text()").extract_first()
        url = 'http://www.jinshasitemuseum.com/About/Introduction'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #8
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 117
        item["museumName"] = '汉阳陵博物馆'
        item["Location"] = '地址:西安咸阳国际机场专线公路东段'
        item["Link"] = 'http://www.hylae.com/'
        item["opentime"] = response.xpath(
            "normalize-space(/html/body/div[3]/div[2]/div[2]/div/div[1]/div[2]/text()[1])"
        ).extract_first()
        item["telephone"] = '029-62657569'
        url = 'http://www.hylae.com/index.php?ac=article&at=list&tid=10'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #9
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 114
        item["museumName"] = '陕西历史博物馆'
        item["Location"] = response.xpath(
            "/html/body/div[3]/div/div[2]/p[3]/text()").extract_first()
        item["Link"] = 'http://www.sxhm.com/'
        item["opentime"] = '周二至周日 09:00-17:30;遇法定节假日周一除外'
        item["telephone"] = response.xpath(
            "/html/body/div[3]/div/div[2]/p[1]/text()").extract_first()
        url = 'http://www.sxhm.com/index.php?ac=article&at=list&tid=230'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #10
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 115
        item["museumName"] = '秦始皇帝陵兵马俑博物馆'
        item["Location"] = '河南省郑州市金水区农业路8号'
        item["Link"] = 'http://bmy.com.cn/'
        item["opentime"] = response.xpath(
            "/html/body/div[4]/div[1]/div[3]/div/div[2]/div[2]/text()"
        ).extract_first()
        item["telephone"] = response.xpath(
            "/html/body/div[3]/div/div/div[2]/p/span/text()").extract_first()
        url = 'http://www.bmy.com.cn/html/gov/jggk/8eaf8a3015b643b7adcb9d6815e0f845.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #11
0
    def parse(self, response):
        item=MuseumsItem()
        item["museumID"]=106
        item["museumName"]='自贡市盐业历史博物馆'
        item["Location"]=response.xpath("normalize-space(/html/body/div[6]/div/div[2]/p[1])").extract_first()
        item["Location"] = str(item["Location"]).replace(u'\u3000', u'')

        item["Link"]='http://www.zgshm.cn/index.html'
        item["opentime"]='1月1日-12月31日 08:30-16:30'
        item["telephone"]=response.xpath("normalize-space(/html/body/div[6]/div/div[2]/p[2]/text())").extract_first()
        item["telephone"] = str(item["telephone"]).replace(u'\u3000', u'  ')

        url='http://www.zgshm.cn/content.jsp?id=297e0fc26362ffbb016380a82d360199'#  +response.xpath("/html/body/div[1]/ul//a/@href").extract_first()
        # 处理详情页r
        
        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item":item}#传递参数
        )
Exemple #12
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 110
        item["museumName"] = '重庆中国三峡博物馆'
        item["Location"] = response.xpath(
            "/html//div/div/div[5]/div[2]/div[2]/div/div[2]/ul/li[5]/p/text()"
        ).extract_first()
        item["Link"] = 'http://www.3gmuseum.cn/'
        item["opentime"] = '每日9:00-17:00(16:00禁止入馆) 周一闭馆(法定节假日除外)'
        item["telephone"] = response.xpath(
            "/html//div/div/div[5]/div[2]/div[2]/div/div[2]/ul/li[1]/p/text()"
        ).extract_first()
        url = 'http://www.3gmuseum.cn/web/article/toArticleNo.do?articleno=1&base=&fullPath=http%3A%2F%2Fwww.3gmuseum.cn&type=&itemsonno=12121212&topitemno=402880b25a3bb962015a3bc512212223&itemno=402880b25a3bb962015a3bc512212223'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #13
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 116
        item["museumName"] = '延安革命纪念馆'
        item["Location"] = response.xpath(
            "/html/body/div[2]/div[2]/div[2]/div/div[2]/p[2]/span/strong"
        ).extract_first()
        item["Link"] = 'http://www.yagmjng.com/'
        item["opentime"] = '每日09:00至17:00(16:00停止入馆)'
        item["telephone"] = response.xpath(
            "/html/body/div[2]/div[2]/div[2]/div/div[2]/p[4]/span/strong/text()"
        ).extract_first()
        url = 'http://www.yagmjng.com/rsf/site/jinianguan/zhanguanjianjie/info/2020/81013.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #14
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 112
        item["museumName"] = '重庆自然博物馆'
        item["Location"] = response.xpath(
            'normalize-space(//div[3]/div[5]//div[3]/div[1]/text())'
        ).extract_first()

        item["Link"] = 'https://www.cmnh.org.cn/'
        item["opentime"] = response.xpath(
            '//div[3]/div[1]//div[3]//div[1]/p[1]/text()[3]').extract_first()
        item["telephone"] = response.xpath(
            'normalize-space(//div/div[3]/div[5]//div[3]/h3)').extract_first()
        url = 'https://www.cmnh.org.cn/about/?4.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #15
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 108
        item["museumName"] = '云南省博物馆'
        item["Location"] = '云南省昆明市广福路6393号'
        item["Link"] = 'http://www.ynmuse um.org'
        item["opentime"] = response.xpath(
            "/html/body/div/div[3]/div[2]/div/div[1]/div/div[3]/div/text()"
        ).extract_first()
        item["telephone"] = response.xpath(
            "/html//div/div[3]/div[2]/div/div[1]/div/div[3]/div/text()"
        ).extract_first()
        url = 'http://www.ynmuseum.org' + response.xpath(
            "//div/div[1]/div/ul/li/div/div/div/a/@href").extract_first()
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #16
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 128
        item["museumName"] = '青海省博物馆'
        item["Location"] = '青海省西宁市西关大街58号'
        #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'http://www.qhmuseum.cn/'
        item["opentime"] = '夏季:9:00—16:30;冬季:9:30—16:00,每周一闭馆休整。'
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = '0971--6118691'
        #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'https://baike.baidu.com/item/%E9%9D%92%E6%B5%B7%E7%9C%81%E5%8D%9A%E7%89%A9%E9%A6%86/1627225?fr=aladdin'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #17
0
 def parse(self, response):
     item=MuseumsItem()
     item["museumID"]=126
     item["museumName"] ='宁夏固原博物馆'
     item["Location"] = response.xpath('normalize-space(/html//div[3]/div/div[2]/p[2]/text())').extract_first()
     item["Location"] = str(item["Location"]).replace(u'\\xa0', u'')
     item["Location"] = str(item["Location"]).replace(u'\xa0', u'')
     item["Link"]='http://www.nxgybwg.com/'
     item["opentime"] = response.xpath('normalize-space(/html//div[2]/div/div[7]/div/div/div[1]/div/div[1]/p/text()[2])').extract_first()
     item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u'')
     item["opentime"] = str(item["opentime"]).replace(u'\xa0', u'')
     item["telephone"]=response.xpath('normalize-space(/html//div[3]/div/div[2]/p[3]/text())').extract_first()
     item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u'')
     item["telephone"] = str(item["telephone"]).replace(u'\xa0', u'')
     url='http://www.nxgybwg.com/e/action/ShowInfo.php?classid=1&id=307'
     # 处理详情页
     
     yield scrapy.Request(
         url,
         callback=self.parse_detail,
         meta={"item":item}#传递参数
     )
Exemple #18
0
 def parse(self, response):
     item=MuseumsItem()
     item["museumID"]=123
     item["museumName"] ='甘肃省博物馆'
     item["Location"] ='甘肃省兰州市七里河区西津西路3'
     #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
     #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
     item["Link"]='http://www.gansumuseum.com/'
     item["opentime"] = response.xpath('/html/body/div[1]/div[1]/div[2]/div/ul/li[3]/text()').extract_first()
     item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
     item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
     item["telephone"]='0931-2346308'
     #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
     #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
     url='http://www.gansumuseum.com/about/show-1.html'
     # 处理详情页
     
     yield scrapy.Request(
         url,
         callback=self.parse_detail,
         meta={"item":item}#传递参数
     )
Exemple #19
0
 def parse(self, response):
     item=MuseumsItem()
     item["museumID"]=121
     item["museumName"] ='宝鸡青铜器博物院'
     item["Location"] =response.xpath('normalize-space(/html/body/div[3]/div/div[2]/text()[4])').extract_first()
     item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
     item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
     item["Link"]='http://www.bjqtm.com/'
     item["opentime"] = response.xpath('/html/body/div[1]/div[2]/div[1]/p[1]/text()[1]').extract_first()
     #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
     #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
     item["telephone"]=response.xpath('normalize-space(/html/body/div[1]/div[2]/div[1]/p[2])').extract_first()
     item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
     item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
     url='http://www.bjqtm.com/index.php?ac=article&at=list&tid=44'
     # 处理详情页
     
     yield scrapy.Request(
         url,
         callback=self.parse_detail,
         meta={"item":item}#传递参数
     )
Exemple #20
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 130
        item["museumName"] = '吐鲁番博物馆'
        item["Location"] = '吐鲁番市高昌区木纳尔路1268号'
        #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = ''
        item["opentime"] = '周二至周日10:00-18:00'
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = '0995-7619644;0995-7619645;0995-7619650'
        #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'http://www.xabwy.com/Statics/2020.01/66.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #21
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 124
        item["museumName"] = '天水市博物馆'
        item["Location"] = '甘肃省天水市秦州区伏羲路110号'
        #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'http://www.tssbwg.com.cn/'
        item["opentime"] = '每天上午8:00 - 12:00;下午 14:00 - 18:00 开放'
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = '0938-8291377'
        #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'http://www.tssbwg.com.cn/html/2013/zzjg_1127/218.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #22
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 125
        item["museumName"] = '敦煌研究院'
        item["Location"] = '甘肃省酒泉市敦煌市'
        #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'https://www.dha.ac.cn/'
        item["opentime"] = ''
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = '敦煌研究院网络中心 : 0937-8869123'
        #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'https://www.dha.ac.cn/'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #23
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 109
        item["museumName"] = '云南名族博物馆'
        item["Location"] = response.xpath('normalize-space(/html//div/div/div/div/div[2]/div/div[1]/div[2]/div/div/div[2]/div/div[1]/div[2]/div/div/p[1])').extract_first()
        item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'http://www.ynnmuseum.com/main.html'
        item["opentime"] = '开放时间:周二至周日上午9:00——下午4:30(周一闭馆)'
        item["telephone"] = response.xpath(
            'normalize-space(/html//div/div/div/div/div[2]/div/div[1]/div[2]/div/div/div[2]/div/div[1]/div[2]/div/div/p[3])').extract_first()
        item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')

        url = 'http://www.ynnmuseum.com/abouts.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  # 传递参数
        )
Exemple #24
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 129
        item["museumName"] = '新疆维吾尔自治区博物馆'
        item["Location"] = '乌鲁木齐市沙依巴克区西北路581号'
        #item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        #item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = ''
        #item["opentime"] = response.xpath('normalize-space(/html/body/div[4]/div[2]/div[2]/text()[1])').extract_first()
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        item["opentime"] = '每周二至周日的10:30—18:00'
        item["telephone"] = '0991-4536436'
        #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'http://www.xabwy.com/Statics/2020.01/66.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )
Exemple #25
0
 def parse(self, response):
     item=MuseumsItem()
     item["museumID"]=118
     item["museumName"]='西安碑林博物馆'
     item["Location"]='西安三学街15号'
     item["Link"]='http://www.beilin-museum.com/'
     content=""
     x = response.xpath("//table[4]//table[2]//td[3]/table[3]//div[3]/p[2]/text()[1]").extract_first()
     content += x
     x = response.xpath("//table[4]//table[2]//td[3]/table[3]//div[3]/p[2]/text()[2]").extract_first()
     content += x
     x = response.xpath("//table[4]//table[2]//td[3]/table[3]//div[3]/p[2]/text()[3]").extract_first()
     content += x
     item["opentime"]=content.replace(u'\xa0', u'')
     item["telephone"]='87210764'
     url='http://www.beilin-museum.com/contents/45/976.html'
     # 处理详情页
     
     yield scrapy.Request(
         url,
         callback=self.parse_detail,
         meta={"item":item}#传递参数
     )
Exemple #26
0
    def parse(self, response):
        item = MuseumsItem()
        item["museumID"] = 127
        item["museumName"] = '宁夏回族自治区博物馆'
        item["Location"] = response.xpath(
            'normalize-space(/html//footer/div/div[2]/div/div[1]/span[3])'
        ).extract_first()
        item["Location"] = str(item["Location"]).replace(u'\\xa0', u' ')
        item["Location"] = str(item["Location"]).replace(u'\xa0', u' ')
        item["Link"] = 'https://www.nxbwg.com/'
        item["opentime"] = '周一闭馆,周二-周日:9:00-16:50'
        #item["opentime"] = str(item["opentime"]).replace(u'\\xa0', u' ')
        #item["opentime"] = str(item["opentime"]).replace(u'\xa0', u' ')
        item["telephone"] = '电话:(0951)5085093'
        #item["telephone"] = str(item["telephone"]).replace(u'\\xa0', u' ')
        #item["telephone"] = str(item["telephone"]).replace(u'\xa0', u' ')
        url = 'https://www.nxbwg.com/a/30.html'
        # 处理详情页

        yield scrapy.Request(
            url,
            callback=self.parse_detail,
            meta={"item": item}  #传递参数
        )