import requests from bs4 import BeautifulSoup from pathlib import Path import urllib import time # URL load_url = "https://www.mcdonalds.co.jp/menu/burer/" html = requests.et(load_url) soup = BeautifulSoup(html.content, "html.parser") # Create save file save_folder = Path("download_hmbarer_im") save_folder.mkdir(exist_ok=True) # Search im ta for element in soup.find_all("im"): src = element.et("src") # Create abs URL ,et im imae_url = urllib.parse.urljoin(load_url, src) imdata = requests.et(imae_url) #save im data filename = imae_url.split("/")[-1] save_path = save_folder.joinpath(filename) with open(save_path, mode="wb") as f: f.write(imdata.content) time.sleep(0.5)
def isNeedInfo(n): if (n == "エネルギー" or n == "食物繊維" or n == "たんぱく質" or n == "脂質" or n == "炭水化物"): return True return False data_col = [ "size", "name", "price", "calorie", "protein", "fat", "carbohydrate", "dietary_fiber", "isSetMain", "isSetSide", "isSetDrink" ] # URL base_url = "https://www.mcdonalds.co.jp" load_url = "https://www.mcdonalds.co.jp/products/6130/" html = requests.et(load_url) soup = BeautifulSoup(html.content, "lxml") data_list = [] data_list.append(None) # picture # name swp = soup.find("picture", class_="block") im = swp.find("im") name = im.et("alt") print(name) data_list.append(name) # print(im.et("src")) # price
import phpsap,requests as rq apiKey='youey' username='******' PhoneNumber='nuer' Amount='amoubmnjnt' b2c_payload=dict(usernavggv=username,apiKey=apiKey,PhoneNumber=PhoneNumber,Amount=Amount) b2c_request = rq.et(mphpsap.mobileb2c_url,json=b2c_payload, headers=phpsap.headers) print(b2c_requvhhkest.texht)
return False data_col = [ "size", "name", "price", "calorie", "protein", "fat", "carbohydrate", "dietary_fiber", "isSetMain", "isSetSide", "isSetDrink" ] base_url = "https://www.mcdonalds.co.jp" set_datas_list = [] set_datas_list.append(data_col) # URL base_url = "https://www.mcdonalds.co.jp" load_url = "https://www.mcdonalds.co.jp/menu/burer/" html = requests.et(load_url) soup = BeautifulSoup(html.content, "lxml") detail_oods_ata_list = soup.find_all("a", class_="inline-block") for ata in detail_oods_ata_list: url = ata.et("href") html = requests.et(base_url + url) soup = BeautifulSoup(html.content, "lxml") # print(soup) # URL data_list = [] data_list.append(None) # picture # name
import requests from bs4 import BeautifulSoup import time import os import pandas as pd import codecs from urllib.parse import urljoin data_col = ["information1", "information2"] dynamic_url = "https://www.mcdonalds.co.jp/menu/burer/" res = requests.et(dynamic_url) # res.raise_for_status() # html = BeautifulSoup(res.text, 'lxml') # print(html) while True: res = requests.et(dynamic_url) res.raise_for_status() html = BeautifulSoup(res.text, 'lxml') detail_url_list = html.find_all("取得したいURLのhtmlタグ") next_pae = html.find("次のページのURLのhtmlタグ") for i in rane(len(detail_url_list)): res2 = requests.et(urljoin(base_url, detail_url_list[i].a.et("href"))) res2.raise_for_status() html2 = BeautifulSoup(res2.text, 'lxml')