Example #1
0
 def get_data(self, resp):
     '''取:解析数据'''
     '''{"name": "??", 
     "detail_url":"http://www.dachenglaw.com/cn/professionals/qi.ao.html",
      "email": "*****@*****.**",
       "position":"?????", 
       "location": "??"}'''
     if not resp:
         # print(f"请求[{resp.url}],响应为空,不做解析")
         return
     html = etree.HTML(resp.content.decode())
     trs = html.xpath('//tbody/tr')
     items = []
     for tr in trs:
         name = tr.xpath('./td[1]/a/text()')
         detail_url = 'http://www.dachenglaw.com' + extra_first(
             tr.xpath('./td[1]/a/@href'))
         email = tr.xpath('./td[2]/text()')
         position = tr.xpath('./td[3]/text()')
         location = tr.xpath('./td[4]/text()')
         ''''''
         #print(name,detail_url,email,position,location)
         item = {
             "name": extra_first(name),
             "detail_url": detail_url,
             "email": extra_first(email),
             "position": extra_first(position),
             "location": ''.join(extra_first(location).split())
         }
         yield item
Example #2
0
 def get_data(self, resp):
     if not resp:
         # print(f'请求[{resp.url}],响应为空,不做解析')
         return
     html = etree.HTML(resp.content.decode())
     lis = html.xpath('//div[@class="pc_temp_songlist "]/ul/li')
     # print(len(lis))
     for ll in lis:
         id = ll.xpath("./span[3]/text()")
         title = ll.xpath("./@title")
         detail_url = ll.xpath("./a/@href")
         time = ll.xpath("./span[4]/span/text()")
         item = {
             "id": extra_first(id),
             "title": extra_first(title),
             "detail_url": extra_first(detail_url),
             "time": extra_first(time),
         }
         yield item
Example #3
0
    def get_data(self, resp):
        if not resp:
            # print(f'请求[{resp.url}],响应为空,不做解析')
            return
        html = etree.HTML(resp.content.decode())
        tables = html.xpath('//div/table')

        # print(len(dds))
        for table in tables[1:]:
            title = table.xpath("./tr/td[2]/div/a/text()")
            detail_url = table.xpath("./tr/td[2]/div/a/@href")
            info = table.xpath("./tr/td[2]/p/text()")
            score = table.xpath("./tr/td[2]/div[2]/span[2]/text()")
            desc = table.xpath('./tr/td[2]/p[2]/text()')
            item = {
                "title": extra_first(title),
                "detail_url": extra_first(detail_url),
                "info": extra_first(info),
                "score": extra_first(score),
                "desc": extra_first(desc),
            }
            yield item
Example #4
0
    def get_data(self, resp):
        if not resp:
            return
        html = etree.HTML(resp.content.decode())
        trs = html.xpath('//dl/dd')
        items = []
        for i in trs:
            id = i.xpath('./i/text()')
            title = i.xpath('./a/@title')
            detail_url = i.xpath('./a/@href')
            actors = i.xpath('./div/div/div/p[2]/text()')
            releasetime = i.xpath('./div/div/div/p[3]/text()')
            score1 = i.xpath('./div/div/div[2]/p/i[1]/text()')
            score2 = i.xpath('./div/div/div[2]/p/i[2]/text()')

            item = {
                "id": extra_first(id),
                "detail_url": 'https://maoyan.com' + extra_first(detail_url),
                "title": extra_first(title),
                "actors": extra_first(actors),
                "releasetime": extra_first(releasetime),
                "score": extra_first(score1) + extra_first(score2)
            }
            yield item
Example #5
0
 def get_data(self, resp):
     if not resp:
         # print(f'请求[{resp.url}],响应为空,不做解析')
         return
     html = etree.HTML(resp.content.decode())
     lis = html.xpath('//ol/li')
     # print(len(lis))
     for li in lis:
         id = li.xpath("./div/div/em/text()")
         title = li.xpath("./div/div/a/img/@alt")
         detail_url = li.xpath("./div/div/a/@href")
         content = li.xpath("./div/div[2]/div[2]/p/text()")
         score = li.xpath("./div/div[2]/div[2]/div/span[2]/text()")
         info = li.xpath('./div/div[2]/div[2]/p/span/text()')
         item = {
             "id": extra_first(id),
             "title": extra_first(title),
             "detail_url": extra_first(detail_url),
             "content": extra_first(content),
             "score": extra_first(score),
             "info": extra_first(info),
         }
         yield item