def __init__(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', 'Referer': 'https://ningbo.51zhupai.com/house' } self.city_code = inquire(CityAuction, source) self.map = inquire(TypeAuction, source)
def __init__(self): self.headers = { 'Referer': 'http://www1.rmfysszc.gov.cn/projects.shtml?dh=3&gpstate=1&wsbm_slt=1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36' } self.type_list = inquire(TypeAuction, source) self.city_list = inquire(CityAuction, source)
def __init__(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36' } self.list_info = [] self.type_list = inquire(TypeAuction, source)
def __init__(self): self.start_url = 'http://www.chinesesfpm.com/index/index/getAjaxSearch.html' self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'} self.data = {'court_sheng': None, 'court_city': None, 'court_arer': None, 'province': None, 'city': 0, 'min_price': None, 'max_price': None, 'do_paimai': 1, 'do_s_type': None, 'biaodi_type': 3, 'do_isajax': 1, 'page': 1, 'do_label': 0, } self.map = inquire(TypeAuction,'chinesesfpm') self.data_list = inquire(CityAuction, 'chinesesfpm')
import datetime import yaml import re import math from sql_mysql import inquire, TypeAuction setting = yaml.load(open('config.yaml')) client = Mongo( host=setting['mongo']['host'], port=setting['mongo']['port'], ).connect coll = client[setting['mongo']['db']][setting['mongo']['collection']] source = 'jingdong' log = LogHandler(__name__) type_list = inquire(TypeAuction, source) s = requests.session() class Jingdong: def __init__(self): self.headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36", } self.count = 0 def start_crawler(self): for type_num in type_list: page_num = self.get_page(type_num.code) for page in range(1, int(page_num) + 1):