Example #1
0
 def __init__(self, page):
     self.url = 'https://search.jd.com/Search?keyword=%E8%A3%A4%E5%AD%90&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&offset=5&wq=%E8%A3%A4%E5%AD%90&page=' + str(
         page)
     self.headers = {
         'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     self.search_urls = 'https://search.jd.com/s_new.php?keyword=%E8%A3%A4%E5%AD%90&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&offset=3&wq=%E8%A3%A4%E5%AD%90&page={0}&s=26&scrolling=y&pos=30&show_items={1}'
     self.pids = set()  # 页面中所有的id,用来拼接剩下的30张图片的url,使用集合可以有效的去重
     self.img_urls = set()  # 得到的所有图片的url
     self.search_page = page + 1  # 翻页的作用
     self.sql = save_mysql()  # 数据库保存
Example #2
0
	def __init__(self,url,id,soup,referer):
		self.platform="淘宝"
		self.id=id
		self.address=url+id
		self.headers={
			"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36",
			"referer":referer}
			
		self.pageSoup = soup
		self.description=None
		self.sql = save_mysql()
Example #3
0
 def __init__(self, page):
     self.url = 'https://search.jd.com/Search?keyword=空调&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&suggest=1.def.0.V16&wq=kongt&cid2=794&cid3=870&stock=1&page=' + str(
         page)
     self.headers = {
         'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     # self.search_urls = 'https://search.jd.com/Search?keyword=空调&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&suggest=1.def.0.V16&wq=\
     # kongt&cid2=794&cid3=870&stock=1&page={0}&s=26&scrolling=y&pos=30&show_items={1}'
     self.pids = set()  # 页面中所有的id,用来拼接剩下的30张图片的url,使用集合可以有效的去重
     self.product_urls = set()
     self.img_urls = set()  # 得到的所有图片的url
     self.search_page = page + 1  # 翻页的作用
     self.sql = save_mysql()  # 数据库保存