def parse(self, response):
     names = [
         '毛泽东用过的办公桌', '刘少奇用过的文件箱', '朱德用过的金属桌椅', '董必武用过的百寿杖', '董必武用过的棕色毛毯',
         '毛泽东用过的转椅'
     ]
     imgs = [
         'https://bkimg.cdn.bcebos.com/pic/43a7d933c895d143afbd303d7bf082025baf07e0?x-bce-process=image/resize,m_lfit,w_220,h_220,limit_1',
         'https://bkimg.cdn.bcebos.com/pic/d009b3de9c82d1581d2ea3af880a19d8bc3e4204?x-bce-process=image/resize,m_lfit,w_220,h_220,limit_1',
         'https://bkimg.cdn.bcebos.com/pic/6609c93d70cf3bc775f936f2d900baa1cd112a37?x-bce-process=image/resize,m_lfit,w_220,h_220,limit_1',
         'https://bkimg.cdn.bcebos.com/pic/c75c10385343fbf23ce75e3db87eca8064388fe1?x-bce-process=image/resize,m_lfit,w_220,h_220,limit_1',
         'https://bkimg.cdn.bcebos.com/pic/3ac79f3df8dcd1005e6e1a3f7a8b4710b9122f3a?x-bce-process=image/resize,m_lfit,w_220,h_220,limit_1',
         'https://bkimg.cdn.bcebos.com/pic/ac6eddc451da81cbcf9a6dd35a66d016092431b4?x-bce-process=image/resize,m_lfit,w_220,h_220,limit_1'
     ]
     intros = [
         '在这张办公桌上,毛泽东同志起草了三大战役期间的许多电报手稿,起草了1948年9月政治局扩大会议的决议,撰写了《在中国共产党第七届中央委员会第二次全体会议上的报告》等许多光辉著作。',
         '刘少奇同志在延安时就开始使用这个木箱,后来把它带到西柏坡,曾用它放过《论共产党员的修养》、《关于土地问题的指示》、《中国土地法大纲》等重要文件和手稿。',
         '这套金属桌椅是在孟良固战役中缴获敌七十四师师长张灵甫的物品,朱德到前线视察时,由陈毅同志送给朱德。七届二中全会期间,陈毅、彭德怀、贺龙、邓小平等军事组的同志们就围坐在这套金属桌椅旁,讨论军事战略和作战方针。',
         '这根拐杖本来是国民党将领宋席儒送给岳父六十大寿的贺礼,淮海战役时被缴获。拐杖身部错银镶嵌篆书“寿”字四行,每行25字,共100字,字型各异,故称“百寿杖”。',
         '该毛毯长230厘米,宽165厘米,四周有两条黑色条纹,上有两处破损。是延安大生产时织的1977年6月,张连英将毛毯赠给西柏坡纪念馆。1997年5月23日,这条毛毯被国家文物局革命文物鉴定组鉴定为国家一级文物。',
         '该转椅长67.5厘米,宽58厘米,高76厘米,一腿四足,木结构圈椅式,坐面、靠背、扶手用黑色漆布包面。这把转椅是在解放后的石家庄运到西柏坡的,毛泽东就是坐在这把转椅上撰写文章、著作,批阅文件。1997年5月23日,这把皮转椅被国家文物局革命文物鉴定组鉴定为国家一级文物。'
     ]
     for i in range(6):
         item = collection75Item()
         item["museumID"] = 19
         item['collectionName'] = names[i]
         item['collectionImage'] = imgs[i]
         item['collectionIntroduction'] = intros[i]
         yield item
예제 #2
0
 def parse(self, response):
     names = [
         '明赵南星书札册', '明万历乙未年鎗金彩漆云龙荷塘纹漆盒', '北齐青釉仰覆莲花尊', '唐白石胁侍菩萨立像',
         '清王原祁南山图轴', '春秋“孟姬”铜匜'
     ]
     imgs = [
         'http://bwy.hbdjdz.com/upload_img/primary_collection/13000005-132.jpg',
         'http://bwy.hbdjdz.com/upload_img/primary_collection/13000005-6100.jpg',
         'http://bwy.hbdjdz.com/upload_img/primary_collection/13000005-86.jpg',
         'http://bwy.hbdjdz.com/upload_img/primary_collection/13000005-4153.jpg',
         'http://bwy.hbdjdz.com/upload_img/primary_collection/13000005-9743.jpg',
         'http://bwy.hbdjdz.com/upload_img/primary_collection/13000005-7626.jpg'
     ]
     intros = [
         '尺寸:外径纵38.3厘米 横24.6厘米出土地.册页,木板封面,前题赵忠毅公书扎,纸本,行书,11开。',
         '木胎髹漆。长方形委角,子母口,长方形圈足。盖面为红色漆地,上描黑色锦纹,锦纹内鎗金卐字,主体纹饰为二龙戏珠,花纹细部以金线勾勒。盖壁四周及盒身为黑色漆地,上填红色及鎗金花纹。纹饰为湖石花卉,荷塘水鸟。委角及口沿四周饰缠枝花卉。底足四周饰鎗金云纹。内壁及盒底髹黑漆。通身有蛇腹断纹。盒底刀刻填金款识"大明万历乙未年制"。',
         '喇叭形口,长颈,丰肩,腹饱满,高足。莲瓣纹盖,盖顶正中有堆塑的覆莲捉手。颈肩之间安六个双泥条系。颈中部有三道凸起的弦纹,弦纹上部堆贴模印的团龙纹,下面堆贴兽面纹。肩部至底足装饰6层不同形态的莲瓣,肩部堆贴的两层双瓣覆莲,莲瓣圆润舒展;第三层莲瓣凸起的瓣尖恰在腹体中部,采用深雕技法刻出,棱角清晰锋利,每瓣的根部还加饰一片模印菩提树叶;第四层仰莲贴在下腹部;高足上的覆莲亦用深雕技法刻出。通体饰青釉,釉色青绿。这件器物形体高大,造型古朴,气魄宏伟,繁缛华丽,采用浅刻、深雕、模印、堆贴等多种装饰技法,具有很高的艺术水平,是北朝青瓷的代表性作品。',
         '为一立形胁侍菩萨。身姿婀娜,宽肩,挺胸,细腰斜曲,上身略向右倾,腹微鼓。胸部自左肩至右胁下斜系一条帛巾,在左胸部挽成一小结。肩披自然下垂,绕腕后在膝部作两次迴环,然后向体侧飘扬。下系长裙,颈部及腿部佩有华丽的璎珞,赤足立于仰莲座上。',
         '图轴式,纸本设色画。图中近有矾台水泊,松柏数株,山腰山角,有水阁宇舍,远有突峰漫嵌,干笔皴擦,浑厚苍润,是其杰作。右上自识“癸未嘉平为南老年道兄,五(王)之袠初度,余作南山图奉祝,偶为公事所阻。今岁往来直庐,时作时辍,日来以残腊公馀亟成之,恰值生申令辰,犹可以当补祝也。时康熙甲申腊月望后,娄东王原祁。”白文麓台朱文。',
         '匜呈椭圆形。敛口,腹微鼓,前有敞口流,后有龙形鋬,下有四蹄形足。器身上部饰阴文三角卷云纹及三角垂叶纹,纹饰洗练。鋬为龙形,龙嘴衔匜口,鼓目,竖耳,双角弯于额两侧,背刻鳞甲,背鳍与尾翼上翘。匜内底刻铭文7行38字,"惟正月初吉丁亥 蔡叔季子孙员媵孟姬有之妇沬盘。用祈囗寿,万年无疆。子子孙孙永宝用之"反映了春秋时期北方燕国与中原蔡国(今河南上蔡)互通婚嫁的密切交往。'
     ]
     for i in range(6):
         item = collection75Item()
         item["museumID"] = 18
         item['collectionName'] = names[i]
         item['collectionImage'] = imgs[i]
         item['collectionIntroduction'] = intros[i]
         yield item
예제 #3
0
 def parse(self, response):
     item=collection75Item()
     item['museumID']=82
     url1=response.xpath("//ul[@class='xwdt']/li//@href").getall()
     for url in url1:
         url='http://www.jzmsm.org'+url
         yield scrapy.Request(url,callback=self.Others,meta={"item":item})
예제 #4
0
 def parse(self, response):
     names=['玛瑙串饰',
            '玉蟠龙',
            '傅山 户外一峰',
            '天曹府君天曹掌禄主算判官诸司判官等众',
            '释迦坐像',
            '曲德造佛三尊像龛']
     imgs=['http://www.shanximuseum.com/Uploads/Picture/2018/12/17/s5c174e30058c2.jpg',
           'http://www.shanximuseum.com/Uploads/Picture/2018/12/10/s5c0e3b8d1e364.jpg',
           'http://www.shanximuseum.com/Uploads/Picture/2018/12/14/s5c13469bb29ee.jpg',
           'http://www.shanximuseum.com/Uploads/Picture/2018/12/11/s5c0f55054c969.jpg',
           'http://www.shanximuseum.com/Uploads/Picture/2018/12/11/s5c0f5eed00cb0.jpg',
           'http://www.shanximuseum.com/Uploads/Picture/2018/12/11/s5c0f5ad307164.jpg']
     intros=['1961年侯马市上马村出土.由玉和玛瑙组成,形状有珠状、管状、片状,有的表面刻有纹饰,色泽鲜亮。',
             '2005年曲沃县羊舌墓地M2出土。表面因土沁变为黄褐色。整体造型为一条蜷曲的龙纹,双线阴刻,线条流畅。',
             '傅山(1607~1684年),字青竹、青主、侨黄等,别号甚多,尤以“朱衣道人”著名。山西阳曲人。清代著名学者、思想家、医学家、文学家、书画家。傅山的书法在17世纪的中国书坛独树一帜,他于真、草、篆、隶无不擅长,并超时代地开创了清代碑学之先河。他喜以篆、籀笔法作书,重骨力,书出颜真卿,并总结出“宁拙毋巧,宁丑毋媚,宁支离毋轻滑,宁真率毋安排”的经验。他的画作也达到了很高的艺术境界,所画山水、梅、兰、竹等均极精妙,被列入“逸品”。他的字、画均渗透出超逸的品格和崇高的气节,流溢着爱国主义的气息。所著颇多,可惜大都散佚,流传者有《霜红龛集》、《两汉书姓名韵》、《傅青主女科》等。',
             '纵117、横60厘米。画分两层。下层左方画两位高官,头戴展翅幞头,身着长袍,执圭,当系天曹府君和天曹掌禄。后有两官捧卷侍立,旁一短衣挎包随从。上层五人头戴垂翅幞头,穿长袍皂靴,当为判官,虽面目狰狞而意态自然。',
             '通高93厘米,宽45厘米。1982年山西省芮城县风陵渡出土。释迦牟尼结跏跌坐于八角形束腰须弥座上,内着袒右肩僧衣,外披袈裟,衣裙裹腿。脸型丰圆,闭目合唇,面带微笑,慈祥平和。右手残缺,左手作降魔触地印。台座下框部分刻铭文,“大唐景龙四年四月十五日弟子张敬节为七世先……帝及师僧父母法界众生同出……供养”等字。',
             '高33.5厘米,宽12厘米,厚9厘米。山西博物院旧藏。圆拱形尖楣龛,楣尖上方雕一佛二弟子像。佛作高肉髻,身着通肩大衣,双手合收于腹前,结跏跌坐于圆形台座上,二弟子面相长圆,双手亦合收腹前,盘坐于圆形台座之上。佛龛内主像作高肉髻,面相圆润,身着通肩广袖大衣,结跏跌坐于须弥台座上,双手施无畏与愿印。两侧胁侍菩萨头戴宝冠,面相长圆,一手置于腹前,一手置于身侧,下着贴体长裙,跣足立于台座上。台座下部浮雕双狮,并印刻造像记:“大唐麟德元年七月八日佛弟子曲德为亡妻赵敬造碑像一区……”']
     for i in range(6):  
         item=collection75Item()
         item["museumID"]=21
         item['collectionName']=names[i]
         item['collectionImage']=imgs[i]
         item['collectionIntroduction']=intros[i]
         yield item  
예제 #5
0
 def parse(self, response):
     item=collection75Item()
     item['museumID']=91
     url1=response.xpath("//ul[@class='nav navbar-nav']/li[4]//li//@href").getall()
     for url in url1:
         url='https://www.gznywmuseum.org'+url
         yield scrapy.Request(url,callback=self.Others,meta={"item":item})
    def parse(self, response):

        d_list = response.xpath(
            "//div[@class='view2']/div[@class='collect-box']")
        for d in d_list:
            dy = d.xpath("./div/div[@class='collect-info']")
            i = 0
            for dd in dy:
                item = collection75Item()
                item["museumID"] = 75
                i += 1
                if i > 3:
                    break
                else:
                    item[
                        "collectionImage"] = 'http://www.chnmus.net' + dd.xpath(
                            "./a/img/@src").extract_first()
                    item["collectionName"] = dd.xpath(
                        "./a/h5/text()").extract_first()
                    url = 'http://www.chnmus.net' + dd.xpath(
                        "./a/@href").extract_first()

                    #处理详 情页
                    yield scrapy.Request(
                        url,
                        callback=self.parse_detail,
                        meta={"item": item}  #传递参数
                    )
 def parse(self, response):
     #d_list=response.xpath("//div[@id='1099']")
     tag = response.text
     #print(tag)
     #date = re.findall(r'<tr>(.*?)</tr>',str(tag))
     date = re.findall(
         r'<td  height=\'128\' align=\'center\' valign=\'top\'>(.*?)</td>',
         str(tag))
     #print(date)
     for d in date:
         item = collection75Item()
         item["museumID"] = 55
         #print(d)
         item["collectionName"] = re.findall('<a title=\'(.*?)\'', d)
         item["collectionName"] = ''.join(item["collectionName"])
         img = re.findall('<img src=\'(.*?)\'', d)
         item["collectionImage"] = 'http://www.nbmuseum.cn' + ''.join(img)
         # print(item)
         url = re.findall('href=\'(.*?)\'', d)
         url = 'http://www.nbmuseum.cn' + ''.join(url)
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item},
         )
예제 #8
0
    def parse(self, response):
        item = collection75Item()
        item["museumID"] = 29
        li_list = response.xpath(
            "//ul[@class='mainul productlist-02']//li[@class='content column-num4']"
        )
        for li in li_list:
            image_url = li.xpath(
                "./div[@class='pic-module']/div[@class='pic']/a/img/@src"
            ).extract_first().strip()
            item["collectionImage"] = 'http://www.sypm.org.cn' + image_url
            url = 'http://www.sypm.org.cn' + \
                li.xpath("./div[@class='pic-module']/div[@class='pic']/a/@href").extract_first().strip()

            # 处理详情页
            yield scrapy.Request(
                url,
                callback=self.parse_detail,
                meta={'item': copy.deepcopy(item)}  # 传递参数
            )

        # 完成每页之后开始下一页
        if self.page <= 1:
            self.page += 1
            new_url = self.base_url.format(self.page)
            yield scrapy.Request(url=new_url, callback=self.parse)
예제 #9
0
 def parse(self, response):
     names = ['瘦煤', '贫煤', '弱黏结煤', '薄片透光', '焦煤', '长焰煤']
     imgs = [
         'http://www.coalmus.org.cn/UploadFiles/2016-01/20074231150894352.jpg',
         'http://www.coalmus.org.cn/UploadFiles/2016-01/200742317485376070.jpg',
         'http://www.coalmus.org.cn/UploadFiles/2016-01/200742311525667786.jpg',
         'http://www.coalmus.org.cn/UploadFiles/2016-01/200742022484179650.jpg',
         'http://www.coalmus.org.cn/UploadFiles/2016-01/200742022455763503.jpg',
         'http://www.coalmus.org.cn/UploadFiles/2016-01/200742317422175699.jpg'
     ]
     intros = [
         '瘦煤是炼焦用煤之中配煤,性能与焦煤相近。瘦煤焦炭块度大、裂纹少,但熔融性和耐磨性差,其用途除作炼焦配煤外,还可以用于造气、发电和其他动力用煤。',
         '贫煤是变质程度最高的烟煤,无黏结性。燃烧时火焰短,延续时间长。主要用作动力煤,也可造气,用作合成氨原料和气体燃料。',
         '弱黏结煤是炼焦煤与非炼焦煤之间的过渡煤种,主要用作造气、燃料和配焦。低硫、低灰、低磷的弱黏结煤,是全国最主要的优质动力煤。',
         '焦煤是炼焦用煤中的主焦煤,变质程度中等,结焦性和黏结性最佳。利用焦煤炼焦,可得到焦炭、焦油和焦炉气。焦炭除供给)台炼外,还可造气和电石。而焦油和焦炉气可作为燃料,还能提炼数十种化工产品。',
         '肥煤是炼焦用煤的一种,用肥煤炼出的焦炭横裂纹多,焦根部蜂焦多,易碎,但肥煤的黏结力很强,能与黏结力弱的煤搭配后炼出优质焦煤,故称肥煤为”配焦煤之母”。',
         '长焰煤是变质程度最低的煤,无黏结性和结焦性。主要用作燃料。经低温干馏可制半焦、煤气、焦油,造气后可制合成氨等。'
     ]
     for i in range(6):
         item = collection75Item()
         item["museumID"] = 22
         item['collectionName'] = names[i]
         item['collectionImage'] = imgs[i]
         item['collectionIntroduction'] = intros[i]
         yield item
예제 #10
0
 def parse(self, response):
     names = [
         '燕王职戈', '迦陵频伽纹镜', '耀州窑青瓷飞鱼形水盂', '彩釉塑贴云龙纹三足罐', '铜鎏金大威德金刚', '木雕罗汉像',
         '王蒙太白山图', ''
     ]
     imgs = [
         'http://www.lnmuseum.com.cn/UpLoadFile/image/20141104/2014110414290218218.jpg',
         'http://www.lnmuseum.com.cn/UpLoadFile/image/20141104/20141104142437723772.jpg',
         'http://www.lnmuseum.com.cn/UpLoadFile/image/maxpic201312/20131217111656.jpg',
         'http://www.lnmuseum.com.cn/UpLoadFile/image/20141013/20141013130286468646.jpg',
         'http://www.lnmuseum.com.cn/UpLoadFile/image/20141104/20141104112386758675.jpg',
         'http://www.lnmuseum.com.cn/UpLoadFile/image/maxpic201312/20131217142103.jpg',
         'http://www.lnmuseum.com.cn/UpLoadFile/image/20141105/20141105104212461246.jpg'
     ]
     intros = [
         '形体大,中部有隆起的脊,脊旁有凹形血槽,胡作三弧线,阑内三穿,直内一穿,内铸虎纹,胡上铸铭文:“郾王职乍御司马”。为燕王作,为其御司寇所用之兵器。器精美而文字史料价值极高。',
         '镜子呈圆形,镜心有破孔,背面铸突起的迦陵频伽纹,主纹空隙填涂黑漆,使花纹生动突出。迦陵频伽为梵语,译为美妙声音,汉译多做“妙音鸟”常见于佛教雕刻,一般作人首鸟身形象。佛经说迦陵是仙鸟,在卵壳中,鸣音已压众鸟,所以说佛法之音与之相似。辽代用此图纹作镜,可见契丹人深信佛教的程度。',
         '整体造型设计成龙鱼形,上颚向上翻卷,双翅高振呈飞翔状,鱼尾商桥,呈“U”字形,器内隔成前后两室,底置小圈足。白瓷胎,胎质细腻坚硬。内外施满青釉,釉色润泽晶莹、青翠欲滴,足根无釉。',
         '高领外侈、敞口,球形腹,下承三兽形足,有窑沾。腹部中央装饰一周凸弦纹,淋釉,器底部无釉露胎,胎质细腻。上腹部贴塑三个动物纹饰,腹部正中贴塑舞狮。器型饱满,线条柔和。',
         '此尊为九头三十四臂,九头分三层排列,正面为牛头,牛角粗大,血盆大口,头戴五骷髅冠;最上一头,为如来相,象征着他是阿弥陀佛化身而来。最下面七头,头顶红发上竖,象征忿怒。上身饰璎珞,下身围虎皮,顶挂五十人头骨串。主二臂抱明妃,其余手伸向两边,诸手皆持法器,铃、杵、刀、剑、弓、箭、瓶、索子、钩、戟、伞、盖、骷髅等兵器,各有寓意。有十六条腿,皆左展姿站立,左右脚分别踩八种人、兽和禽。明妃罗浪杂娃坐在主尊怀中,右手持月刀,左手持人心,左腿勾在主尊腰间,右脚踩飞禽。下为单层覆莲座。此尊形象复杂,面目手足众多,但布局简洁明了,线条一丝不苟。台座上刻“大清乾隆御制”楷书款。',
         '此尊呈比丘相,圆睁双目,面相威严,棱角分明。内穿交领僧衣,外穿通肩式袈裟,以丝带束于腰间。衣纹厚重流畅,裙下摆自然搭于台座上。左手结法印,右手置腿上抓大衣一角,极其出色地突出了布料的质感。结跏趺坐姿。台座为岩石状高台座。罗汉是佛教造像的主要题材之一,形象仿照现实生活中的僧人特点,以印度僧人形象为多。光头,无肉髻,身披袈裟或大领僧衣。相貌不一,手法或夸张或写实,神韵生动。',
         '绘浙江鄞县太白山天童寺及其周围景物,尤其着重描绘天童寺前二十里夹径松林。画面中远岫层峦,溪流潺潺,人物往还其间,用笔松润,赋色淡雅。画心右上角有小字篆书“太白山图”四字,画尾钤“王蒙印”,近人研究认为此卷为王蒙晚年代表作。'
     ]
     for i in range(7):
         item = collection75Item()
         item["museumID"] = 26
         item['collectionName'] = names[i]
         item['collectionImage'] = imgs[i]
         item['collectionIntroduction'] = intros[i]
         yield item
 def parse(self, response):
     names = [
         '抗战时期孙继先使用过的狗皮褥子', '反法西斯联盟国国旗', '抗战时期聂荣臻送给刘显宜的行军床', '山西牺盟会会员证章',
         '冀南银行行徽', '白晋北段破击战中民兵用过的铁扳手'
     ]
     imgs = [
         'http://www.balujun.cn/uploadfile/2010/0519/20100519044802417.jpg',
         'http://www.balujun.cn/uploadfile/2010/0901/20100901093019886.jpg',
         'http://www.balujun.cn/uploadfile/2010/0901/20100901093143182.jpg',
         'http://www.balujun.cn/uploadfile/2010/0901/20100901094315938.jpg',
         'http://www.balujun.cn/uploadfile/2010/0901/20100901095239126.jpg',
         'http://www.balujun.cn/uploadfile/2010/0901/20100901095422667.jpg'
     ]
     intros = [
         '抗战时期孙继先使用过的狗皮褥子', '反法西斯联盟国国旗', '抗战时期聂荣臻送给刘显宜的行军床',
         '山西牺牲救国同盟会证章:直径2.5厘米,铜质,圆形,土黄色地上绘中国地图,其中东三省用蓝色表示,其余为深绿色。上镌“牺牲救国”4个大字。',
         '冀南银行行徽:纵3.3,横3,铜质。盾形,周围双凸线框边,孔雀蓝填色,中间金色圆圈内双钩金色线填深烤蓝色英文字母:“CHB”,圈上下分别为“晋冀鲁豫边区”和“冀南银行”楷书金色阳文。',
         '白(圭)晋(城)北段破击战中民兵用过的铁扳手为铁质,黑色,长49,扳手宽13.2。'
     ]
     for i in range(6):
         item = collection75Item()
         item["museumID"] = 23
         item['collectionName'] = names[i]
         item['collectionImage'] = imgs[i]
         item['collectionIntroduction'] = intros[i]
         yield item
 def parse(self, response):
     item=collection75Item()
     item["museumID"]=14
     item['collectionName']=response.xpath("//div[@class='bodybg']/div[@class='w1200']/table/tr/td[@align='center']/text()").extract()[1]
     item['collectionImage']='www.pgm.org.cn'+response.xpath("//div[@class='bodybg']/div[@class='w1200']/table/tr/td[@width='440']/img/@src").extract_first()
     tr_list=response.xpath("//div[@class='bodybg']/div[@class='w1200']/table/tr")
     item['collectionIntroduction']=tr_list[2].xpath("./td[@style='width:40%;height:300px']/text()").extract_first()
     yield item
예제 #13
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 79
     lurl = response.xpath('//*[@id="r_ww"]/a/@href').extract()
     for url in lurl:
         url = 'http://www.kfsbwg.com' + url
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item})
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 85
     url1 = response.xpath("//div[@id='caseListDIV']/div//@href").getall()
     for url in url1:
         url = 'http://www.zhongshanwarship.org.cn/' + url
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item})
예제 #15
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 97
     url1 = response.xpath("//div[@class='videoli']/dl")
     for li in url1:
         url = 'http://www.amgx.org/' + li.xpath("./dd//@href").get()
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item})
예제 #16
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 87
     url1 = response.xpath("//div[@id='p_item']//div/a/@href").getall()
     for url in url1:
         url = 'http://www.ssmzd.com' + url
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item})
예제 #17
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 99
     collectionName = '暂无'
     collectionIntroduction = '暂无'
     collectionImage = '暂无'
     item['collectionName'] = collectionName
     item['collectionIntroduction'] = collectionIntroduction
     item['collectionImage'] = collectionImage
     yield item
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 86
     url1 = response.xpath("//ul[@id='thumbnailUL']/li//a/@href").getall()
     for url in url1:
         url = 'http://61.187.53.122/' + url
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item},
                              dont_filter=True)
예제 #19
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 89
     url1 = response.xpath(
         "//ul[@class='typelist normalfont']/li//@href").getall()
     for url in url1:
         url = 'http://www.chinajiandu.cn' + url
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item})
예제 #20
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 84
     url1 = response.xpath("//ul[@class='secondmenu']//li//@href").getall()
     for url in url1:
         if 'Collection' in url:
             url = 'http://www.1911museum.com' + url
             yield scrapy.Request(url,
                                  callback=self.Others,
                                  meta={"item": item})
예제 #21
0
 def parse(self, response):
     li_list=response.xpath("//div[@class='list_content']/ul/li")
     #print(li_list)
     for li in li_list:
         item=collection75Item()
         item["museumID"]=67
         item["collectionName"]=li.xpath("./div[1]/a/@title").extract_first()
         item["collectionImage"]='http://www.81-china.com'+li.xpath("./div[1]/a/img/@src").extract_first()
         item["collectionIntroduction"]=''
         yield item
예제 #22
0
 def parse(self, response):
     item=collection75Item()
     item["museumID"]=11
     item['collectionName']=response.xpath("//div[@class='cj_ercom_wai cj_huawenbg1']/div[@class='cj_baici_ma']/div[@class='cj_dycp_lef']/a/img/@alt").extract_first()
     item['collectionImage']='http://www.chnmuseum.cn/zp/zpml/201812'+response.xpath("//div[@class='cj_ercom_wai cj_huawenbg1']/div[@class='cj_baici_ma']/div[@class='cj_dycp_lef']/a/img/@src").extract_first()[1:]
     content=""
     p_list=response.xpath("//div[@class='wwms']/p/text()").extract()
     for p in p_list:
         content+=p
     item['collectionIntroduction']=content
     yield item
예제 #23
0
 def parse(self, response):
     list = response.xpath("//div[@class='dc_list']/div[@class='dc_item']")
     for i in list:
         item = collection75Item()
         item["museumID"] = 9
         item['collectionName'] = i.xpath(
             "./div[@class='dc_right_con']/div[@class='dc_title']/text()]"
         ).extract_first()
         item['collectionImage'] = 'http://www.1937china.com/kzjng/views'
         item['collectionIntroduction'] = "暂无介绍"
         yield item
예제 #24
0
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 93
     url1 = response.xpath(
         "//div[@class='wrap']/div//div[2]//a/@href").getall()
     for url in url1:
         url = 'https://www.shenzhenmuseum.com' + url
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item},
                              dont_filter=True)
 def parse(self, response):
     li_list=response.xpath("//div[@class='clist clear']/div[@class='li']")
     for li in li_list:
         item=collection75Item()
         item["museumID"]=3
         url='http://www.gmc.org.cn'+li.xpath("./a/@href").extract_first()
         yield scrapy.Request(
                     url,
                     callback=self.parse_detail,
                     meta={"item":item}#传递参数
     )
 def parse(self, response):
     list = response.xpath("//div[@class='col-sm-4']")
     for i in list:
         item = collection75Item()
         item["museumID"] = 8
         item['collectionName'] = i.xpath(
             "./div[@class='thumbnail']/a/img/@alt").extract_first()
         item['collectionImage'] = 'www.bmnh.org.cn' + i.xpath(
             "./div[@class='thumbnail']/a/img/@src").extract_first()
         item['collectionIntroduction'] = "暂无介绍"
         yield item
 def parse(self, response):
     li_list=response.xpath("//div[@class='relicAppRight']/div[@class='raAppList']/ul/li")
     for li in li_list:
         item=collection75Item()
         item["museumID"]=4
         url=li.xpath("./a/@href").extract_first()
         yield scrapy.Request(
                     url,
                     callback=self.parse_detail,
                     meta={"item":item}#传递参数
     )
예제 #28
0
 def parse(self, response):
     li_list = response.xpath("//div[@class='mainbar_pic_nr']/ul/li")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 66
         item["collectionName"] = li.xpath("./a/@title").extract_first()
         item["collectionImage"] = li.xpath("./a/img/@src").extract_first()
         url = li.xpath("./a/@href").extract_first()
         yield scrapy.Request(url,
                              callback=self.parse_detail,
                              meta={"item": item})
 def parse(self, response):
     item = collection75Item()
     item['museumID'] = 83
     print("qq" * 20)
     url1 = response.xpath(
         "//div[@class='col-sort']/ul/li[1]/a/@href").getall()
     for url in url1:
         url = 'http://www.whmuseum.com.cn' + url
         print("*" * 20)
         yield scrapy.Request(url,
                              callback=self.Others,
                              meta={"item": item})
 def parse(self, response):
     li_list = response.xpath("//div[@class='content_shougao']/dl")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 6
         url = 'http://www.luxunmuseum.com.cn' + li.xpath(
             "./dd/a/@href").extract_first()
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  #传递参数
         )