def __init__(self, page): self.url = 'https://search.jd.com/Search?keyword=%E8%A3%A4%E5%AD%90&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&offset=5&wq=%E8%A3%A4%E5%AD%90&page=' + str( page) self.headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } self.search_urls = 'https://search.jd.com/s_new.php?keyword=%E8%A3%A4%E5%AD%90&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&offset=3&wq=%E8%A3%A4%E5%AD%90&page={0}&s=26&scrolling=y&pos=30&show_items={1}' self.pids = set() # 页面中所有的id,用来拼接剩下的30张图片的url,使用集合可以有效的去重 self.img_urls = set() # 得到的所有图片的url self.search_page = page + 1 # 翻页的作用 self.sql = save_mysql() # 数据库保存
def __init__(self,url,id,soup,referer): self.platform="淘宝" self.id=id self.address=url+id self.headers={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36", "referer":referer} self.pageSoup = soup self.description=None self.sql = save_mysql()
def __init__(self, page): self.url = 'https://search.jd.com/Search?keyword=空调&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&suggest=1.def.0.V16&wq=kongt&cid2=794&cid3=870&stock=1&page=' + str( page) self.headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } # self.search_urls = 'https://search.jd.com/Search?keyword=空调&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&suggest=1.def.0.V16&wq=\ # kongt&cid2=794&cid3=870&stock=1&page={0}&s=26&scrolling=y&pos=30&show_items={1}' self.pids = set() # 页面中所有的id,用来拼接剩下的30张图片的url,使用集合可以有效的去重 self.product_urls = set() self.img_urls = set() # 得到的所有图片的url self.search_page = page + 1 # 翻页的作用 self.sql = save_mysql() # 数据库保存