コード例 #1
0
def _get_data(date, dateType, province, k, v):
    print("crawling {} {} {}".format(date, province, k))
    res = bot.get(
        urls.get('trend_data').format(province=parse.quote(
            province.encode('gbk')),
                                      days=dateType.get(date),
                                      pid=v), )

    return date, province, k, json.loads(res.text)
コード例 #2
0
import sys
sys.path.append("./")
from spider.request_factory import bot, urls, get_input
from bs4 import BeautifulSoup
from urllib import parse
from pprint import pprint
import json
import pandas as pd
import matplotlib.pyplot as plt
category = ['外三元', '内三元', '土杂猪', '玉米', '豆粕']
cities = [
    '北京市', '上海市', '天津市', '重庆市', '广东省', '福建省', '浙江省', '江苏省', '山东省', '辽宁省',
    '江西省', '四川省', '陕西省', '湖北省', '河南省', '河北省', '山西省', '内蒙古'
]

res = bot.get(url=urls.get('trend'))
bs = BeautifulSoup(res.text, "html.parser")
provinces = bs.select("#pri_province")[0].text.strip('\n').replace(
    "更多\n", "").split('\n')
dateType = {'month': 30, 'year': 365}
# days = get_input(dateType, 'dateType')
days = 'year'

items = {}
for item in bs.select(".charts_type > div"):
    items[item.text] = item.attrs.get('value')

# province = get_input(provinces, 'province')
province = cities[0]

# item = get_input(items, 'item')
コード例 #3
0
import sys
sys.path.append("./")
from spider.request_factory import bot, urls, get_input
from bs4 import BeautifulSoup
from urllib import parse
from pprint import pprint
import json
import pandas as pd
import matplotlib.pyplot as plt
#category = ['外三元', '内三元', '土杂猪', '玉米', '豆粕','白条肉']
category = ['19', '22', '20', '8', '9','10']
cities = ['北京市', '上海市', '天津市', '重庆市', '广东省', '福建省', '浙江省', '江苏省', '山东省', '辽宁省', '江西省', '四川省', '陕西省', '湖北省', '河南省', '河北省', '山西省', '内蒙古']

res = bot.get(url=urls.get('trend'))
bs = BeautifulSoup(res.text, "html.parser")
provinces = bs.select("#pri_province")[0].text.strip('\n').replace("更多\n", "").split('\n')

dateType = {
    'month': 30,
    'year': 365
}

days = 'year'

province = cities[0]

#item = category[0]
res1 = bot.get(urls.get('trend_data').format(province=parse.quote(province.encode('gbk')), days=dateType.get(days),
                                            pid=category[0]),
              )