def shuichanpin_parse(self, response):
     now = time.strftime('%Y-%m-%d', time.localtime())
     json_response = json.loads(response.body)
     for num in range(0, 18):
         farm_item = FarmItem()
         farm_item['province'] = "山东"
         farm_item['market'] = "青岛市城阳蔬菜水产品批发市场"
         farm_item['typy'] = json_response["list"][num]["PSort"]
         farm_item['name'] = json_response["list"][num]["PName"]
         farm_item['standard'] = "none"
         farm_item['area'] = "华东"
         farm_item['color'] = "none"
         farm_item['unit'] = "元/公斤"
         farm_item['minPrice'] = json_response["list"][num]["LPrice"]
         farm_item['avgPrice'] = json_response["list"][num]["PPrice"]
         farm_item['maxPrice'] = json_response["list"][num]["MPrice"]
         farm_item['entertime'] = now
         farm_item['time'] = json_response["list"][num]["ReleaseTime"]
         if time.mktime(
                 time.strptime(farm_item['time'],
                               "%Y-%m-%d")) > self.today - self.crawl_day:
             yield farm_item
         # yield farm_item
     self.shuichanpin_current_num += 1
     if self.shuichanpin_current_num != self.max_crawl_num:
         yield scrapy.FormRequest(url='http://www.cncyms.cn/pages.php',
                                  formdata={
                                      "pageNum":
                                      str(self.shuichanpin_current_num),
                                      "pname":
                                      "",
                                      "reltime":
                                      "水产品"
                                  },
                                  callback=self.shuichanpin_parse)
Exemple #2
0
    def parse(self, response):
        now = time.strftime('%Y-%m-%d', time.localtime())
        json_response = json.loads(response.body)
        for num in range(0, 18):
            farm_item = FarmItem()
            farm_item['province'] = "山东"
            farm_item['market'] = "青岛市城阳蔬菜水产品批发市场"
            farm_item['typy'] = json_response["list"][num]["PSort"]
            farm_item['name'] = json_response["list"][num]["PName"]
            farm_item['standard'] = "none"
            farm_item['area'] = "华东"
            farm_item['color'] = "none"
            farm_item['unit'] = "元/公斤"
            farm_item['minPrice'] = json_response["list"][num]["LPrice"]
            farm_item['avgPrice'] = json_response["list"][num]["PPrice"]
            farm_item['maxPrice'] = json_response["list"][num]["MPrice"]
            farm_item['entertime'] = now
            farm_item['time'] = json_response["list"][num]["ReleaseTime"]
            yield farm_item

        self.current_num+=1
        print("=====================crawl:" + str(self.current_num))
        if self.current_num!=2942:
            yield scrapy.FormRequest(
                url='http://www.cncyms.cn/pages.php',
                formdata={"pageNum": str(self.current_num), "pname": "", "reltime": "副食品"},
                callback=self.parse
            )
    def pricePage_parse(self, response):
        print("------价格页面解析函数------")
        item_list = response.xpath("//table[@class='f_s_14']/tr")
        for i_item in item_list:
            farm_item = FarmItem()  #Farmitem实例化对象
            farm_item['province'] = "黑龙江"
            farm_item['market'] = "牡丹江地利农副产品有限公司"
            farm_item['typy'] = "蔬菜"
            farm_item['name'] = i_item.xpath(
                "./td[5]/a/text()").extract_first()[11:-4]
            farm_item['standard'] = "none"
            farm_item['area'] = "东北"
            farm_item['color'] = "none"
            farm_item['unit'] = "元/斤"
            farm_item['minPrice'] = i_item.xpath(
                "./td[2]/text()").extract_first()[1:]
            farm_item['avgPrice'] = i_item.xpath(
                "./td[4]/text()").extract_first()[1:]
            farm_item['maxPrice'] = i_item.xpath(
                "./td[3]/text()").extract_first()[1:]
            farm_item['entertime'] = self.now
            farm_item['time'] = i_item.xpath("./td[1]/text()").extract_first()
            #print(farm_item)
            yield farm_item

        current_page = response.xpath(
            "//div[@id='pager']/strong/text()").extract_first()
        print("正在补充当前品种的所有url")
        page_list = response.xpath("//div[@id='pager']/a")
        for page in page_list:
            yield scrapy.Request("http://www.vipveg.com" +
                                 page.xpath("./@href").extract_first(),
                                 callback=self.pricePage_parse)
 def parse(self, response):
     print("----------正在解析首页-----------")
     item_list = response.xpath(
         "//td[@class='borderTop p_5']/table/tr/td/a")
     # print(item_list)
     for i_item in item_list:
         city = i_item.xpath("./text()").extract_first()
         area = self.getArea(city)
         farm_item = FarmItem()
         farm_item['province'] = city
         farm_item['area'] = area
         farm_item['typy'] = "蔬菜"
         farm_item['standard'] = "none"
         farm_item['color'] = "none"
         farm_item['unit'] = "元/斤"
         farm_item['entertime'] = self.now
         if area != "异常":
             yield scrapy.Request("http://www.vipveg.com" +
                                  i_item.xpath("./@href").extract_first(),
                                  meta={'item': farm_item},
                                  callback=self.provinceIndexParse)