Esempio n. 1
0
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
import scrapy
from agriculture_demo.items import CfvinItem
from agriculture_demo.dbhelper import DBHelper

get_today = datetime.now()
dbhelper = DBHelper()
date = dbhelper.get_latest_date("strawberry_market_news")
if date == None:
    starttime = (datetime.today() + timedelta(days=-30))
else:
    starttime = date
# print(starttime)
'''
草莓行情新闻数据:
1.数据库中没有数据,爬取近30天内的数据
2.数据库中有数据,爬取数据库中最新数据到当前时间的数据
'''


class SpiderCfvinSpider(scrapy.Spider):
    name = 'spider_cfvin'
    allowed_domains = ['cfvin.com']
    start_urls = ['http://www.cfvin.com/caomei/']

    def parse(self, response):
        results = response.xpath("//ul[@id='list']//li")
        for result in results:
            link = result.xpath("./a/@href").extract_first()
            yield scrapy.Request(link, callback=self.parse_second)
Esempio n. 2
0
 def process_item(self, item, spider):
     if isinstance(item, AppleAgronetItem):
         db = DBHelper()
         db.vertify_and_insert(item, "apple_price", "date", "variety",
                               "terminal_market")
     return item
Esempio n. 3
0
 def process_item(self, item, spider):
     if isinstance(item, Weather):
         db = DBHelper()
         db.vertify_and_update(item, "weather", "name")
     return item
Esempio n. 4
0
 def process_item(self, item, spider):
     if isinstance(item, CfvinItem):
         db = DBHelper()
         db.vertify_and_insert(item, "strawberry_market_news", "date",
                               "title")
     return item
Esempio n. 5
0
 def process_item(self, item, spider):
     if isinstance(item, AppleZhengzhouItem):
         db = DBHelper()
         db.vertify_and_insert(item, "apple_zhengzhou", "date",
                               "variety_month")
     return item
Esempio n. 6
0
 def process_item(self, item, spider):
     if isinstance(item, NatescItem):
         db = DBHelper()
         db.vertify_and_insert(item, "pest_news", "date", "title")
     return item
Esempio n. 7
0
 def process_item(self, item, spider):
     if isinstance(item, Precipitation):
         db = DBHelper()
         db.vertify_and_insert(item, "precipitation", "date")
     return item
Esempio n. 8
0
 def process_item(self, item, spider):
     if isinstance(item, NmcItem):
         db = DBHelper()
         db.vertify_and_insert(item, "forecast_and_assessment", "date",
                               "title")
     return item
Esempio n. 9
0
 def process_item(self, item, spider):
     if isinstance(item, MofcomItem):
         db = DBHelper()
         db.vertify_and_insert(item, "strawberry_price", "date", "market")
     return item
Esempio n. 10
0
# -*- coding: utf-8 -*-
import datetime

import scrapy
from datetime import timedelta, datetime
from agriculture_demo.items import NatescItem
from agriculture_demo.dbhelper import DBHelper

get_today = datetime.now()
dbhelper = DBHelper()
date = dbhelper.get_latest_date("pest_news")
if date == None:
    starttime = (datetime.today() + timedelta(days=-365))
else:
    starttime = date
print(starttime)
'''
病虫害新闻数据
1.数据库中没有数据,爬取近一年的数据
2.数据库中有数据,爬取数据库中最新时间到当前时间之间的数据
'''


class SpiderNatescSpider(scrapy.Spider):
    name = 'spider_natesc'
    allowed_domains = ['natesc.org.cn']
    start_urls = ['https://www.natesc.org.cn/sites/cb/List_28092_151760.html']

    def parse(self, response):
        c = (get_today - starttime).days
        result = response.xpath(
Esempio n. 11
0
# -*- coding: utf-8 -*-
from datetime import timedelta, datetime
import scrapy
from agriculture_demo.items import MofcomItem
from province_city import get_province
from agriculture_demo.dbhelper import DBHelper

global page, yesterday, starttime
yesterday = (datetime.today() + timedelta(days=-1))
page = 1
dbhelper = DBHelper()
date = dbhelper.get_latest_date("strawberry_price")
if date == None:
    starttime = (datetime.today() + timedelta(days=-90))
else:
    starttime = date
# print(starttime)
headers = {
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding':
    'gzip, deflate, sdch, br',
    'Accept-Language':
    'zh-CN,zh;q=0.8',
    'Connection':
    'keep-alive',
    'Host':
    'nc.mofcom.gov.cn',
    'Upgrade-Insecure-Requests':
    '1',
    'User-Agent':
Esempio n. 12
0
# -*- coding: utf-8 -*-
import scrapy
import datetime
from datetime import timedelta
from agriculture_demo.items import AppleAgronetItem
from province_city import get_province

get_today = datetime.datetime.now()
from agriculture_demo.dbhelper import DBHelper

dbhelper = DBHelper()
date = dbhelper.get_latest_date("apple_price")
if date == None:
    starttime = (datetime.today() + timedelta(days=-365))
else:
    starttime = date
'''
苹果价格数据爬取:
1.数据库中为空时,爬取过去据今一年的数据
2.数据库中有数据,爬取数据库中最新日期到当前日期的数据
'''


class SpiderAgronetSpider(scrapy.Spider):
    name = 'spider_agronet'
    allowed_domains = ['agronet.com.cn']
    start_urls = ['http://www.agronet.com.cn/Price/List?page=1&siteID=7']

    def parse(self, response):
        c = (get_today - starttime).days
        items = response.xpath("//ul[@class='price_table']//li[position()>1]")