def parse(self, response): subSelector = response.xpath('//div[@class ="top-min"]') items = [] for sub in subSelector: catgoryName = sub.xpath( './/div[@class="tt fn-clear"]/h5/text()').extract()[0] storyNameTotal = sub.xpath( './/ul[@class ="top-list fn-clear"]/li/h5/a/text()').extract() rankpoint = sub.xpath( './/ul[@class ="top-list fn-clear"]/li/div[@class="lasted-num fn-left"]/i/text()' ).extract() point_int = sub.xpath( './/ul[@class ="top-list fn-clear"]/li/div[@class="lasted-time fn-right"]/strong/text()' ).extract() point_float = sub.xpath( './/ul[@class ="top-list fn-clear"]/li/div[@class="lasted-time fn-right"]/span/text()' ).extract() pointField = [ float(a) + float(b) for a in point_int for b in point_float ] for i in range(len(storyNameTotal)): item = Meiju100Item() item['catgory'] = catgoryName item["storyname"] = storyNameTotal[i] item['rank'] = rankpoint[i] item['point'] = pointField[i] items.append(item) return items
def parse(self, response): li_list = response.xpath('//ul[@class="top-list fn-clear"]/li') for sub in li_list: item = Meiju100Item() item['movname'] = sub.xpath('./h5/a/text()').extract_first() item['movurl'] = response.urljoin( sub.xpath('./h5/a/@href').extract_first()) item['movstatu'] = sub.xpath( './span[1]/font/text()').extract_first() item['movstation'] = sub.xpath('./span[2]/text()').extract_first() item['movupdate'] = sub.xpath('./div[2]/text()').extract_first() if item['movstatu']: pass else: item['movstatu'] = sub.xpath( './span[1]/text()').extract_first() if item['movstation']: pass else: item['movstation'] = [u'未知'] if item['movupdate']: pass else: item['movupdate'] = sub.xpath( './div[2]/font/text()').extract_first() yield item
def parse(self, response): items = [] subSelector = response.xpath('//ul[@class="top-list fn-clear"]/li') for sub in subSelector: item = Meiju100Item() item['storyName'] = sub.xpath('./h5/a/text()').extract() item['storyState'] = sub.xpath('./span[1]/font/text()').extract() item['tvStation'] = sub.xpath('./span[2]/text()').extract() item['updateTime'] = sub.xpath('./div[2]/text()').extract() items.append(item) return items
def parse(self, response): subSelector = response.xpath('//ul[@class="top-list fn-clear"]/li') items = [] for sub in subSelector: item = Meiju100Item() item['storyName'] = sub.xpath('./h5/a/text()').extract()[0] # item['storyState'] = sub.xpath('./span[@class="state1 new100state1"]/text()').extract()[0] item['tvStation'] = sub.xpath( './span[@class="mjtv"]/text()').extract()[0] # item['updateTime'] = sub.xpath('./div[@class="lasted-time new100time fn-right"]/font/text()').extract()[0] item['storyLX'] = sub.xpath( './span[@class="mjjq"]/text()').extract()[0] print("----------------------------------") items.append(item) return items
def parse(self, response): subSelector = response.xpath('//li/div[@class="lasted-num fn-left"]') items = [] for sub in subSelector: item = Meiju100Item() item['storyName'] = sub.xpath('../h5/a/text()').extract()[0] item['storyState'] = sub.xpath( '../span[@class="state1 new100state1"]/text()').extract()[0] item['tvStation'] = sub.xpath( '../span[@class="mjtv"]/a/text()').extract() item['updateTime'] = sub.xpath( '../div[@class="lasted-time new100time fn-right"]/text()' ).extract()[0] items.append(item) return items