Beispiel #1
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     img_url = response.xpath(
         '//div[@class="artcon"]//img/@src').extract()[0]
     tags = response.xpath('//div[@id="loat6"]/a/text()').extract()
     try:
         tags[0] = response.xpath(
             '//div[@class="zuozhe1"]/a/text()').extract()[0]
     except:
         tags.pop(0)
     tags = ','.join(tags)
     title = response.xpath('//h1/text()').extract()[0]
     remark = response.xpath(
         '//div[@class="articlebox"]/div[@class="artcon"]//text()').extract(
         )
     # remark = ''.join(response.xpath('//div[@class="artcon"]/p[1]//text()').extract()).strip()
     # if not remark:
     #     remark = ''.join(response.xpath('//div[@class="artcon"]/p[2]//text()').extract()).strip()
     # if not remark:
     #     remark = response.xpath('//div[@class="artcon"]/text()').extract()[0]
     remark = [''.join(i.split()) for i in remark]
     remark = ''.join(remark)
     if len(remark) > 480:
         remark = remark[:480]
     item['tags'] = tags
     item['title'] = title.strip()
     item['remark'] = remark.strip()
     item['url'] = url
     item['img_url'] = img_url
     print(item)
     for key, value in data.items():
         item[key] = value
     yield item
 def parse(self, response):
     detail_list = response.xpath(
         '//div[@class="dboom-container-block"]//article')
     for i in detail_list:
         item = DesignItem()
         title = i.xpath('./h3[@class="dboom-title"]/a/text()').extract()[0]
         url = i.xpath('./h3[@class="dboom-title"]/a/@href').extract()[0]
         try:
             remark = i.xpath(
                 './p[@class="dboom-excerpt flip-other"]/text()').extract(
                 )[0]
         except:
             remark = ''
         tags = 'design'
         item['title'] = title
         item['remark'] = remark
         item['url'] = url
         item['tags'] = tags
         yield scrapy.Request(url,
                              callback=self.parse_detail,
                              meta={'item': item})
     if self.page < 500:
         print(self.page)
         self.page += 1
         yield scrapy.Request('https://www.designboom.com/design/page' +
                              str(self.page) + '/',
                              callback=self.parse)
Beispiel #3
0
 def parse(self, response):
     date = json.loads(response.body)
     for i in date['data']:
         item = DesignItem()
         title = i['title']
         tags = ''
         try:
             for cate in i['showcateList']:
                 tags += cate['name'] + ','
         except:
             tags = ''
         img_url = 'http://www.perdesigncn.com' + i['litpic']
         url = 'http://www.perdesigncn.com/Home/info/' + i['id']
         info = i
         item['title'] = title
         item['tags'] = tags
         item['url'] = url
         item['img_url'] = img_url
         for key, value in data.items():
             item[key] = value
         item['info'] = i
         yield item
     if self.p < 9:
         self.p += 1
         try:
             yield scrapy.FormRequest(url=self.url,
                                      formdata={'p': str(self.p)},
                                      callback=self.parse)
         except:
             print(self.p, '*' * 50)
Beispiel #4
0
 def parse_detail(self, response):
     item = DesignItem()
     prize_level = response.meta['prize_level']
     prize_time = response.xpath('//li[@class="project-year project-term"]/h4/text()').extract()[0]
     tags = response.xpath('//li[@class="project-discipline project-term"]//div/text()').extract()
     for i in range(tags.count(' ')):
         tags.remove(' ')
     designer = response.xpath('//div[@class="columns project-details"]/div[2]//li/text()').extract()[0]
     try:
         company = response.xpath('//div[@class="columns project-details"]/div[3]/div/p/text()').extract()[0]
     except:
         company = ''
     title = response.xpath('//h1/text()').extract()[0]
     img_url = response.xpath('//div[@class="project-main-content__inner-wrapper"]/figure[1]/img/@src').extract()[0]
     if not img_url.startswith('https://good-design.org'):
         img_url = 'https://good-design.org' + img_url
     remark = response.xpath('//div[@class="project-description"]/p[1]/text()').extract()[0]
     remark = remark.replace('\n','').replace(' ','').replace('\r','').strip()
     if len(remark) > 450:
         remark = remark[:450]
     item['prize_level'] = prize_level
     item['prize_time'] = prize_time
     item['tags'] = tags
     item['designer'] = designer
     item['company'] = company
     item['title'] = title
     item['img_url'] = img_url
     item['remark'] = remark
     for key, value in data.items():
         item[key] = value
     yield item
Beispiel #5
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     tags = response.xpath(
         '//li[contains(@class," li_active1")]/a/text()').extract()[0]
     img_url = response.xpath(
         '//div[@class="view_content"]/p/img/@src').extract()[0]
     if not img_url.startswith('http'):
         img_url = 'http://www.designdo.cn' + img_url
     try:
         remark = response.xpath(
             '/html/body/div[9]/div[1]/table/tr/td[1]/p[2]/span/text()'
         ).extract()[0]
     except:
         remark = ''
     title = response.xpath('//p[@class="case_title"]/text()').extract()[0]
     item['title'] = title
     item['remark'] = remark
     item['img_url'] = img_url
     item['url'] = url
     item['tags'] = tags
     for key, value in data.items():
         item[key] = value
     # print(item)
     yield item
Beispiel #6
0
 def parse_detail(self, response):
     item = DesignItem()
     print(self.category_index)
     url = response.url
     tags = self.category[self.category_list[self.category_index]]
     img_url = response.xpath(
         '//div[@class="product-banner product-banner-video"]/img/@src'
     ).extract()[0]
     if not img_url.startswith('http'):
         img_url = 'http://www.lkkdesign.com' + img_url
     remark = response.xpath(
         '//div[@class="product-banner-txt"]//text()').extract()
     remark = [''.join(i.split()) for i in remark]
     remark = ''.join(remark)
     if len(remark) > 500:
         remark = remark[:500]
     title = response.xpath('//h1/text()').extract()[0]
     item['title'] = title
     item['remark'] = remark
     item['img_url'] = img_url
     item['url'] = url
     item['tags'] = tags
     for key, value in data.items():
         item[key] = value
     yield item
Beispiel #7
0
 def parse_detail(self,response):
     item = DesignItem()
     prize_level = response.xpath('//ul[@class="eparams"]/li[1]/text()').extract()[1].strip()
     prize_time = self.year
     url = response.url
     img_url = response.xpath('//div[@class="eimglist"]/a[1]/img/@src').extract()[0]
     img_url = img_url.replace('c200x200','a768')
     if not img_url.startswith('http'):
         img_url = 'http://cdn.di-award.org' + img_url
     remark = response.xpath('//p[@class="econtent"]//text()').extract()
     remark = [''.join(i.split()) for i in remark]
     remark = ''.join(remark)
     title = response.xpath('//h3/text()').extract()[0]
     company = response.xpath('//ul[@class="eparams"]/li[2]/text()').extract()[1].strip()
     if len(remark) > 480:
         remark = remark[:480]
     item['title'] = title
     item['remark'] = remark
     item['url'] = url
     item['img_url'] = img_url
     item['company'] = company
     item['prize_level'] = prize_level
     item['prize_time'] = prize_time
     for key, value in data.items():
         item[key] = value
     # print(item)
     yield item
Beispiel #8
0
    def parse(self, response):
        detail_list = response.xpath('//div[@class="post"]')
        for i in detail_list:
            item = DesignItem()
            url = i.xpath('./div[1]/a/@href').extract()[0]
            title = i.xpath('./div[2]/h2//text()').extract()[0]
            img_url = i.xpath('./div[1]/a/img/@src').extract()[0]
            tags = i.xpath('.//div[@class="category"]//text()').extract()
            for i in range(tags.count(' ')):
                tags.remove(' ')
            for i in range(tags.count(', ')):
                tags.remove(', ')
            img_url = img_url.replace('-326x246', '')
            item['tags'] = tags
            item['img_url'] = img_url
            item['url'] = url
            item['title'] = title
            for key, value in data.items():
                item[key] = value

            yield scrapy.Request(url, callback=self.parse_detail, meta={'item': item})

        if self.page < 30:
            self.page += 1
            yield scrapy.Request(url='http://www.pplock.com/industrial-design/page/' + str(self.page),
                                 callback=self.parse)
Beispiel #9
0
    def parse_category(self, response):
        design_list = response.xpath(
            '//ul[@class="gpWinnersInCategory gp itemList"]//div[@class="in"]')
        tags = response.xpath('//ul[@id="sub-nav"]//a[@class="active"]/text()'
                              ).extract()[0]  # 标签
        for design in design_list:
            item = DesignItem()
            title = design.xpath(
                './/h3[@class="projectTitle"]//a/text()').extract()[0]
            prize_level = design.xpath(
                './/p[@class="award"]/text()').extract()[0]
            try:
                designer_name = design.xpath(
                    './/p[@class="agency"]/text()').extract()[1].strip()
            except:
                designer_name = design.xpath(
                    './/p[@class="agency"]/text()').extract()[0].strip()
            detail_url = design.xpath('.//a[1]/@href').extract()[0]
            item['title'] = title  # 标题
            item['tags'] = tags
            item['prize_level'] = prize_level  # 奖项级别
            item['designer'] = designer_name  # 设计者

            item['prize_time'] = str(self.year + 1)  # 奖项时间
            for key, value in data.items():
                item[key] = value
            yield scrapy.Request(url='http://www.effectivedesign.org.uk' +
                                 detail_url,
                                 callback=self.parse_detail,
                                 meta={'item': item})
Beispiel #10
0
 def parse(self, response):
     content = response.text
     result = json.loads(content)
     for i in result:
         item = DesignItem()
         item['title'] = i['title1']
         item['img_url'] = urllib.parse.unquote(
             'http://www.spark-design.cn' + i['pic1'])
         item['url'] = 'http://www.spark-design.cn/zh-cn/caseshow/?id=' + i[
             'id']
         item['tags'] = self.cate_dict[self.cate_list[self.cate_index]]
         for key, value in data.items():
             item[key] = value
         yield item
     if content != '[]':
         self.page += 1
         yield scrapy.Request(
             'http://www.spark-design.cn/ajax.asp?rnd=.7055475&s=case&page=%s&cid=%s'
             % (self.page, self.cate_list[self.cate_index]),
             callback=self.parse)
     else:
         if self.cate_index < 4:
             self.cate_index += 1
             self.page = 1
             yield scrapy.Request(
                 'http://www.spark-design.cn/ajax.asp?rnd=.7055475&s=case&page=%s&cid=%s'
                 % (self.page, self.cate_list[self.cate_index]),
                 callback=self.parse)
Beispiel #11
0
 def parse_detail(self,response):
     item = DesignItem()
     prize_level = self.prize_level[self.prize_index]
     prize_time = self.year
     url = response.url
     img_url = response.xpath('//div[@class="main_image"]/ul/li[1]/img/@src').extract()[0]
     if not img_url.startswith('http://www.chinagooddesignaward.com'):
         img_url = 'http://www.chinagooddesignaward.com' + img_url
     remark = response.xpath('//div[@class="ct_cn"]//text()').extract()
     remark = [''.join(i.split()) for i in remark]
     remark = ''.join(remark)
     title = response.xpath('//h2/text()').extract()[0]
     designer = response.xpath('//div[@class="case_text"]/dl[2]/dd/p//text()').extract()
     designer = [''.join(i.split()) for i in designer]
     designer = ' '.join(designer)
     company = response.xpath('//div[@class="case_text"]/dl[1]/dd/p[1]//text()').extract()
     company = ' '.join(company)
     if len(remark) > 480:
         remark = remark[:480]
     item['title'] = title
     item['remark'] = remark
     item['url'] = url
     item['img_url'] = img_url
     item['designer'] = designer
     item['company'] = company
     item['prize_level'] = prize_level
     item['prize_time'] = prize_time
     for key, value in data.items():
         item[key] = value
     yield item
Beispiel #12
0
    def parse_detail(self, response):
        item = DesignItem()
        # id = re.compile('\?id=\w+').search(response.url).group()
        # page = 'page' + str(response.meta['page'])
        # typeid = 'typeid' + str(response.meta['typeid'])
        img_url = response.xpath(
            '//div[@id="works_list"]//a[1]/img/@src').extract()[0]
        # if img_url in a:
        #     a[img_url].append(id + page + typeid)
        # else:
        #     a[img_url] = [id + page + typeid]

        if not img_url.startswith('http://www.dgdesign.org.cn'):
            img_url = 'http://www.dgdesign.org.cn' + img_url[2:]
        message = response.xpath(
            '//div[@id="works_list"]//td[2]//div[1]/span/text()').extract()
        remark = ''
        print(img_url)
        item['img_url'] = img_url.strip()
        item['title'] = message[0].strip()
        try:
            item['company'] = message[4].strip()
        except:
            item['company'] = ''
        item['prize_time'] = str(self.year)
        item['remark'] = remark
        item['tags'] = [message[1]]
        item['designer'] = message[3].strip()
        for key, value in data.items():
            item[key] = value
        # for key, value in a.items():
        #     if len(value) > 1:
        #         print(value)
        yield item
Beispiel #13
0
 def parse_list(self, response):
     detail_list = response.xpath('//h2/a/@href').extract()
     for i in detail_list:
         item = DesignItem()
         designer = response.xpath('//h1/text()').extract()[0]
         item['designer'] = designer
         yield scrapy.Request(i,
                              callback=self.parse_detail,
                              meta={'item': item})
Beispiel #14
0
 def parse_detail(self, response):
     url = response.url
     print(url)
     self.total += 1
     item = DesignItem()
     img_url = response.xpath('//div[@class="only"]/img/@src').extract()[0]
     if not img_url.startswith('http://www.redstaraward.org'):
         img_url = 'http://www.redstaraward.org/' + img_url
     try:
         title = response.xpath(
             '//div[@class="zuopin_h"][1]/div/text()').extract()[0]
     except:
         title = ''
     try:
         designer = response.xpath(
             '//div[@class="zuopin_h"][2]/div/text()').extract()[0]
     except:
         designer = ''
     try:
         company = response.xpath(
             '//div[@class="zuopin_h"][3]/div/text()').extract()[0]
     except:
         company = ''
     try:
         remark = response.xpath(
             '//div[@class="zuopin_h"][6]/div/text()').extract()[0]
         if len(remark) > 450:
             remark = remark[:450]
     except:
         remark = ''
     try:
         prize_level = response.xpath(
             '//div[@class="zuopin_h"][4]/div/text()').extract()[0]
     except:
         prize_level = ''
     item['img_url'] = img_url.strip()
     item['title'] = title.strip()
     item['company'] = company.strip()
     item['prize_time'] = str(self.year)
     item['remark'] = remark.replace('\n',
                                     '').replace(' ',
                                                 '').replace('\r',
                                                             '').strip()
     item['prize_level'] = prize_level.strip()
     item['designer'] = designer.strip()
     item['url'] = url.strip()
     for key, value in data.items():
         item[key] = value
     print("总数", self.total)
     yield item
Beispiel #15
0
 def parse_detail(self, response):
     item = DesignItem()
     img_url = response.meta.get('img_url')
     url = response.url
     tags = response.meta.get('tags')
     title = response.xpath('//h1[@class="title"]/text()').extract()[0]
     item['title'] = title
     item['img_url'] = img_url
     item['url'] = url
     item['tags'] = tags
     for key, value in data.items():
         item[key] = value
     # print(tags,self.page)
     yield item
Beispiel #16
0
    def parse(self, response):
        date = response.xpath(
            '//div[@class="grid-row flex flex-wrap"]/div[@data-project]/@data-project'
        ).extract()
        for i in date:
            item = DesignItem()
            dic = json.loads(i)
            url = dic['urls']['web']['project']
            img_url = dic['photo']['1536x864']
            if self.category_id in [
                    '332', '333', '334', '335', '336', '337', '52', '362',
                    '338', '51', '339', '340', '341', '342'
            ]:
                tags = "Technology," + self.category_ids[self.category_id]
            elif self.category_id in ['25', '259', '27', '260', '28', '261']:
                tags = 'Design' + self.category_ids[self.category_id]
            else:
                tags = 'Crafts' + self.category_ids[self.category_id]
            item['img_url'] = img_url.strip()
            item['tags'] = tags
            item['url'] = url
            item['info'] = i
            yield scrapy.Request(url,
                                 callback=self.parse_detail,
                                 meta={'item': item})

        if date:
            if self.page < 200:
                self.page += 1
                yield scrapy.Request(
                    'https://www.kickstarter.com/discover/advanced?category_id='
                    + self.category_id + '&sort=magic&seed=2573000&page=' +
                    str(self.page))
            else:
                self.page = 1
                self.index_id += 1
                self.category_id = self.index_ids[self.index_id]
                yield scrapy.Request(
                    'https://www.kickstarter.com/discover/advanced?category_id='
                    + self.category_id + '&sort=magic&seed=2573000&page=' +
                    str(self.page))
        else:
            self.page = 1
            self.index_id += 1
            self.category_id = self.index_ids[self.index_id]
            yield scrapy.Request(
                'https://www.kickstarter.com/discover/advanced?category_id=' +
                self.category_id + '&sort=magic&seed=2573000&page=' +
                str(self.page))
Beispiel #17
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     tags = response.xpath('//span[@class="n_r_wz6"]/a/text()').extract()[0]
     img_url = response.xpath('//a[@onclick]/img/@src').extract()[0]
     title = response.xpath(
         '//*[@id="xn_c_prodv_60_nameText"]/text()').extract()[0]
     item['title'] = title
     item['img_url'] = img_url
     item['url'] = url
     item['tags'] = tags
     for key, value in data.items():
         item[key] = value
     # print(item)
     yield item
Beispiel #18
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     title = response.xpath(
         '//div[@class="c_left"]/div/p[1]/b/text()').extract()[0]
     img_url = response.xpath(
         '//div[@class="case_content"]//img/@src').extract()[0]
     if not img_url.startswith('http'):
         img_url = 'http://www.yu-kangyuan.com' + img_url
     item['title'] = title
     item['img_url'] = img_url
     item['url'] = url
     for key, value in data.items():
         item[key] = value
     yield item
Beispiel #19
0
    def parse_detail(self, response):
        item = DesignItem()
        url = response.url
        tags = self.category[self.category_list[self.category_index]]
        img_url = response.xpath(
            '//div[@class="limitimg"]/p[1]/img/@src').extract()[0]

        title = response.xpath('//h1/text()').extract()[0]
        item['title'] = title
        item['img_url'] = img_url
        item['url'] = url
        item['tags'] = tags
        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #20
0
 def parse(self, response):
     detail_list = response.xpath(
         '//div[@class="d_case_list"]/ul[@class="clearfix"]/li')
     for i in detail_list:
         item = DesignItem()
         title = i.xpath('.//div[@class="h3"]/text()').extract()[0]
         tags = '工业设计,' + i.xpath('.//div[@class="p"]/text()').extract()[0]
         url = 'http://www.a-fourdesign.com' + i.xpath(
             './a/@href').extract()[0]
         item['url'] = url
         item['tags'] = tags
         item['title'] = title
         yield scrapy.Request(url,
                              callback=self.parse_detail,
                              meta={'item': item})
Beispiel #21
0
    def parse_detail(self, response):
        item = DesignItem()
        text = response.xpath(
            '/html/body/table[1]/tr[4]/td/span[4]/table/tr[2]/td[2]/h2/text()'
        ).extract()[0]
        rex = re.compile(r'Winner in (.*?)Design Category,')
        tags = rex.findall(text)[0]
        if tags.count('and'):
            tags = re.sub(r' and ', ',', tags)
        rex = re.compile(r'Category, (.*?) -')
        prize_time = rex.findall(text)[0]
        url = response.url
        remark = ''
        img_url = response.xpath(
            '/html/body/table[1]/tr[3]/td/a/img/@src').extract()[0]
        if not img_url.startswith('http'):
            img_url = 'https://competition.adesignaward.com/' + img_url

        try:
            remark = response.xpath(
                '/html/body/table[1]/tr[3]/td/table/tr[3]/td[1]/text()'
            ).extract()
            index = remark.index(
                " \r\nUNIQUE PROPERTIES / PROJECT DESCRIPTION:")
            remark = remark[index + 1]
            remark = remark.split('\r\n')
            remark = ' '.join(remark)
        except:
            print("*" * 100, remark, response.url)

        title = response.xpath(
            '/html/body/table[1]/tr[2]/td/table/tr/td[1]/h1/text()').extract(
            )[0][:-4]
        designer = response.xpath(
            '/html/body/table[1]/tr[2]/td/table/tr/td[1]/h1/a/text()').extract(
            )[0].strip()
        if len(remark) > 480:
            remark = remark[:480]
        item['title'] = title
        item['remark'] = remark
        item['url'] = url
        item['img_url'] = img_url
        item['designer'] = designer
        item['tags'] = tags
        item['prize_time'] = prize_time
        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #22
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     title = response.xpath('//h1/text()').extract()[0]
     img_url = response.xpath(
         '//ul[@id="picInGG"]/li[1]/img/@src').extract()[0]
     tags = response.xpath(
         '//div[@class="fleft article_tags"]/a/text()').extract()
     tags = '工业设计,' + ','.join(tags)
     item['tags'] = tags
     item['img_url'] = img_url
     item['url'] = url
     item['title'] = title
     for key, value in data.items():
         item[key] = value
     print(item)
     yield item
Beispiel #23
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     img_url = response.xpath(
         '//div[@id="focus"]/ul/li/img/@src').extract()[0]
     if not img_url.startswith('http://www.yxidea.com.cn'):
         img_url = 'http://www.yxidea.com.cn/' + img_url
     tags = self.category[self.category_list[self.category_index]]
     title = response.xpath('//div[@class="pt10"]/text()').extract()[0]
     item['tags'] = tags
     item['title'] = title.strip()
     item['url'] = url
     item['img_url'] = img_url
     print(item)
     for key, value in data.items():
         item[key] = value
     yield item
Beispiel #24
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     tags = self.category[self.category_list[self.category_index]]
     img_url = response.xpath('//div[@class="ci_p"]//img/@src').extract()[0]
     if not img_url.startswith('http'):
         img_url = 'http://www.ctdesign.cn' + img_url[1:]
     title = response.xpath(
         '//div[@class="ci_head clearfix"]/span/text()').extract()[0][3:]
     item['title'] = title
     item['img_url'] = img_url
     item['url'] = url
     item['tags'] = tags
     for key, value in data.items():
         item[key] = value
     # print(item)
     yield item
Beispiel #25
0
    def parse_detail(self, response):
        item = DesignItem()
        url = response.url
        tags = ''
        if self.category[self.category_list[self.category_index]] != '其他案例':
            tags = self.category[self.category_list[self.category_index]]

        img_url = response.xpath('//div[@class="nr"]/table/tbody/tr[2]/td/img/@src').extract()[0]
        if not img_url.startswith('http'):
            img_url = 'http://www.ico-id.com'+img_url
        title = response.meta.get('title')
        item['title'] = title
        item['img_url'] = img_url
        item['url'] = url
        item['tags'] = tags
        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #26
0
    def parse_detail(self, response):
        item = DesignItem()
        url = response.url
        tags = ''
        if self.category[self.category_list[self.category_index]] != '其他行业':
            tags = self.category[self.category_list[self.category_index]]

        img_url = response.xpath('//ul[@class="pic"]/li/img/@src').extract()[0]
        if not img_url.startswith('http'):
            img_url = 'http://www.bfitdesign.com/' + img_url
        title = response.xpath('//*[@class="p2"]/text()').extract()[0]
        item['title'] = title
        item['img_url'] = img_url
        item['url'] = url
        item['tags'] = tags
        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #27
0
    def parse_detail(self, response):
        item = DesignItem()
        url = response.url
        text = response.xpath('//div[@class="plc"]/a/text()').extract()
        tags = text[2]
        title = text[3]
        img_url = response.xpath('//a[@class="jqzoom"]/img/@src').extract()[0]
        if not img_url.startswith('http'):
            img_url = 'http://www.hx-design.com' + img_url

        item['title'] = title

        item['img_url'] = img_url
        item['url'] = url
        item['tags'] = tags
        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #28
0
    def parse_detail(self, response):

        # print(response.text)
        item = DesignItem()
        url = response.url
        tags = response.meta['tags']
        img_url = response.xpath(
            '//div[@class="detail detail_p"]/p//img/@src').extract()[0]
        if not img_url.startswith('http'):
            img_url = 'http://www.newplan.com.cn' + img_url
        title = response.xpath(
            '//div[@class="guide"]/*[last()]/text()').extract()[0]
        item['title'] = title
        item['img_url'] = img_url
        item['url'] = url
        item['tags'] = tags
        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #29
0
    def parse_detail(self, response):
        item = DesignItem()
        url = response.url
        img_url = response.xpath(
            '//ul[@class="bxslider"]/li[1]/img/@src').extract()[0]
        if not img_url.startswith('http'):
            img_url = 'http://www.siwei-id.com' + img_url
        title = response.meta['title']
        remark = response.xpath('//div[@class="w-text"]/p/text()').extract()
        remark = [''.join(i.split()) for i in remark]
        remark = ','.join(remark)
        item['title'] = title.strip()
        item['url'] = url
        item['remark'] = remark
        item['img_url'] = img_url

        for key, value in data.items():
            item[key] = value
        yield item
Beispiel #30
0
 def parse_detail(self, response):
     item = DesignItem()
     url = response.url
     img_url = response.xpath('//div[@class="col-md-12 col-sm-12 col-xs-12"]/img/@src').extract()[0]
     if not img_url.startswith('http'):
         img_url = 'http://www.kcandesign.com/'+img_url
     remark = response.xpath('//div[contains(@class,"page_text")]//text()').extract()
     remark = [''.join(i.split()) for i in remark]
     remark = ' '.join(remark)
     title = response.meta['title']
     item['title'] = title
     item['img_url'] = img_url
     item['url'] = url
     item['remark'] = remark
     # print(remark)
     for key, value in data.items():
         item[key] = value
     # print(item)
     yield item