def pconedetect(): url = "https://www.pcone.com.tw/api/merchant/products?items_per_page=1000&merchant_id=2945567&page=1" my_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} gotrq = requests.get(url,headers = my_headers) data = json.loads(gotrq.text) #轉成python dict pconeIDAry = [] for i in range(len(data["products"])): pconeIDAry.append(data["products"][i]["display_id"]) GOTData = getsheet('商品ID!O:O') #獲取試算表所有資料 GOTDataAry = [] errorID = [] for i in range(len(GOTData)): try: GOTDataAry.append(GOTData[i][0]) #提取試算表資料 except: GOTDataAry.append("") #空直例外 for i in range(len(pconeIDAry)):#找網頁上架ID在不在 試算表中 try: GOTDataAry.index(pconeIDAry[i]) except: #找不到的話 errorID.append(pconeIDAry[i]) for i in range(len(errorID)): output.insert(1.0,str(errorID[i])+"\n") output.insert(1.0,"偵測完成\n")
def getALLpcd(): prdidData = getsheet('商品ID!K:L') #獲取試算表所有資料 wiAry = [] tiAry = [] wrval = [] for row in range(len(prdidData)): wrval.append("") for row in range(len(prdidData)): print(str(row) + "/" + str(len(prdidData))) try: if prdidData[row][0]: time.sleep(0.5) #PC會檔大量讀取 gotstock = getpcd(prdidData[row][0]) #gotstock 得到PC網頁的資料 for i in range(len(gotstock["prd"])): #依網頁資料款式的種類數量迴圈 if gotstock["prd"][i][0] == prdidData[row][ 1]: #獲取的款式ID 等於 試算表上的款式ID wrval[row] = [gotstock["prd"][i][1]] #指派數量資料 if wrval[row] == "": wrval[row] = ["款式ID錯誤"] else: wrval[row] = ["商品ID錯誤"] except: try: if prdidData[row][0]: wrval[row] = ["錯誤或下架"] except: wrval[row] = [""] wrval[0][0] = "PC梓原" + time.strftime("%m/%d", time.localtime()) delsheet("商品ID!V:V") writesheet("商品ID!V1", wrval) print("OK")
def getALLpcone(): prdidData = getsheet('商品ID!O:P') #獲取試算表所有資料 wiAry=[] tiAry= [] wrval = [] for row in range(len(prdidData)): wrval.append("") for row in range(len(prdidData)): print(str(row)+"/"+str(len(prdidData))) try: if prdidData[row][0]: gotstock = getpcone(prdidData[row][0]) for i in range(len(gotstock["prd"])): if gotstock["prd"][i][0] == prdidData[row][1]: wrval[row] = [gotstock["prd"][i][1]] if wrval[row] == "": wrval[row] = ["款式ID錯誤"] except: try: if prdidData[row][0]: wrval[row] = ["錯誤或下架"] except: wrval[row] = [""] wrval[0][0] = "Pcone" + time.strftime("%m/%d", time.localtime()) delsheet("商品ID!Q:Q") writesheet("商品ID!Q1",wrval) print("OK")
def pcddetect(): allid = [] for page in range(1, 200): #登錄後才能訪問的網頁 url = 'http://seller.pcstore.com.tw/S188431702/plist_dt.htm?s=S188431702&c=&skw=&pg=' + str( page) + '&sr=1&pp=50' #瀏覽器登錄後得到的cookie,也就是剛才複製的字符串 cookie_str = r'cbj=IqhLsP6..LOKMq6kVcXtnWgBVcXtnqojj' #######################這邊要研究 my_headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } #把cookie字符串處理成字典,以便接下來使用 cookies = {} for line in cookie_str.split( ';'): #cookie_str.split(';') 依分號 分割成陣列 並列出 key, value = line.split('=', 1) #將陣列依序指派給key value cookies[key] = value #新增物件 session = requests.Session() res = session.get(url, cookies=cookies, headers=my_headers) soup = BeautifulSoup( res.text, "html.parser") #"html.parser" html解析器 將html 轉為bs4格式操作 html = soup.find_all('a', 'hotProdList') for link in html: linkID = link['href'].replace('/S188431702/', '').replace('.htm', '') allid.append(linkID) if len(html) < 50: break output.insert(1.0, "\n") GOTData = getsheet('商品ID!M:M') takeid = [] for i in range(len(GOTData)): try: takeid.append(GOTData[i][0]) except: continue errorid = [] for i in range(len(allid)): try: takeid.index(allid[i]) #看試算表尚存不存在網頁ID except: output.insert(1.0, str(allid[i]) + "\n") output.insert(1.0, "偵測完成\n")
def yahoodetect(): allid = [] for page in range(1, 200): try: output.insert(1.0, ".") url = "https://tw.bid.yahoo.com/booth/Green-Forest-Y3489416698?bfe=1&page=" + str( page) my_headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } gotrq = requests.get(url, headers=my_headers) soup = BeautifulSoup( gotrq.text, "html.parser") #"html.parser" html解析器 將html 轉為bs4格式操作 yahoohtml = soup.find_all('div', 'item-wrap') if len(yahoohtml) == 0: #找完跳出 break for i in range(len(yahoohtml)): allid.append(yahoohtml[i]['data-mid']) except: print("end") break output.insert(1.0, "\n") GOTData = getsheet('商品ID!E:E') takeid = [] for i in range(len(GOTData)): try: takeid.append(GOTData[i][0]) except: continue errorid = [] for i in range(len(allid)): try: takeid.index(allid[i]) #看試算表尚存不存在網頁ID except: output.insert(1.0, str(allid[i]) + "\n") output.insert(1.0, "偵測完成\n")
def rutendetect(): allid = [] for page in range(1, 200): output.insert(1.0, ".") url = "http://class.ruten.com.tw/user/index00.php?s=ting865290&p=" + str( page) my_headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } gotrq = requests.get(url, headers=my_headers) soup = BeautifulSoup( gotrq.text, "html.parser") #"html.parser" html解析器 將html 轉為bs4格式操作 rutenhtml = soup.find_all('div', 'rt-product-tag-container tagging-class') end = soup.find_all("div", class_="item-img-wrap") for i in range(len(rutenhtml)): try: #若裡面沒有重複值則新增 allid.index(rutenhtml[i]['name']) except: allid.append(rutenhtml[i]['name']) if len(end) == 0: break output.insert(1.0, "\n") GOTData = getsheet('商品ID!G:G') takeid = [] for i in range(len(GOTData)): try: takeid.append(GOTData[i][0]) except: continue errorid = [] for i in range(len(allid)): try: takeid.index(allid[i]) #看試算表尚存不存在網頁ID except: output.insert(1.0, str(allid[i]) + "\n") output.insert(1.0, "偵測完成\n")
def getALLRuten(): prdidData = getsheet('商品ID!G:H') #獲取試算表所有資料 wiAry=[] tiAry= [] wrval = [] #寫入的陣列 for row in range(len(prdidData)):#新增同列數陣列 wrval.append([""]) for row in range(len(prdidData)): #for row in range(5,12): print(str(row)+"/"+str(len(prdidData))) #進度 try: if prdidData[row][0]: gotstock = getruten(prdidData[row][0]) #讀取網頁 if type(gotstock['prd']) == type(""): #如果prd為文字則為無款式 wrval[row] = [gotstock['prd']] else:#有款式 for i in range(len(gotstock["prd"])): if gotstock["prd"][i][0] == prdidData[row][1]: #有找到款式ID wrval[row] = [gotstock["prd"][i][1]] #寫入資料 if gotstock["prd"][i][3] == "N": #如果發現款式是關閉 wrval[row]=["款式關閉"] if wrval[row] == "":#都沒有找到 wrval[row] = ["款式ID錯誤"] if gotstock["state"] == True: wrval[row] = ["下架"] except: #有可能ID錯誤,有可能ID無值 try: if prdidData[row][0]:#ID錯誤 wrval[row] = ["商品ID錯誤或無款式ID"] except: wrval[row] = [""] wrval[0][0] = "Ruten" + time.strftime("%m/%d", time.localtime())#寫入第一列 delsheet("商品ID!T:T") #刪除原有資料 writesheet("商品ID!T1",wrval) #寫入資料 return wrval print("OK")
def getALLYahoo(): prdidData = getsheet('商品ID!E:F') #獲取試算表所有資料 wiAry = [] tiAry = [] wrval = [] #寫入的陣列 for row in range(len(prdidData)): #新增同列數陣列 wrval.append([""]) for row in range(len(prdidData)): #for row in range(5,12): print(str(row) + "/" + str(len(prdidData))) #進度 try: if prdidData[row][0]: gotstock = getyahoo(prdidData[row][0]) if type(gotstock['prd']) == type(""): #如果prd為文字則為無款式 wrval[row] = [gotstock['prd']] else: #有款式 for i in range(len(gotstock["prd"])): if gotstock["prd"][i][0] == prdidData[row][1]: wrval[row] = [gotstock["prd"][i][1]] if wrval[row] == "": #都沒有找到 wrval[row] = ["款式ID錯誤"] if gotstock["state"] == '3': wrval[row] = ["下架"] except: #有可能ID錯誤,有可能ID無值 try: if prdidData[row][0]: #ID錯誤 wrval[row] = ["商品ID錯誤或無款式ID"] except: wrval[row] = [""] wrval[0][0] = "Yahoo" + time.strftime("%m/%d", time.localtime()) delsheet("商品ID!S:S") writesheet("商品ID!S1", wrval) return wrval print("OK")