def scrape_shoes(key): url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "roundy"}) items = {} for tableNumber in range(2, 8): for tr in table[tableNumber].find_all("tr")[2:]: name = tr.find_all("td")[0].text.strip() item = { "name": name, # "imageLink": tr.find_all("td")[1].find_all("a")[0]["href"], "priceBuy": parse_price(tr.find_all("td")[2].text), "priceSell": parse_price(tr.find_all("td")[3].text), "source": parse_source(tr.find_all("td")[4]), "variations": parse_variations(tr.find_all("td")[5]), "variationImageLinks": get_image_links(tr.find_all("td")[5].find_all("img")) } if tr.find_all("td")[1].find_all("a"): item["imageLink"] = tr.find_all("td")[1].find_all( "a")[0]["href"] items[name] = item dump_data(items, "clothing/" + key) return items
def scrape_furniture_housewares(key): url = URLS["furniture"][key] response = requests.get(url, timeout=5) soup = BeautifulSoup( response.content, "html5lib") # html.parser does not scrape all html contents tables = soup("table", {"class": "roundy"}) items = {} for table_number in range(3, 29): # a - z if len(tables[table_number]("tr")) > 3: # some tables are empty for tr in tables[table_number]("tr")[2:]: name = tr("td")[1].text.strip() item = { "image_url": parse_image_url(tr("td")[0]), "price": { "buy": parse_price(tr("td")[2].text), "sell": parse_price(tr("td")[3].text) }, "source": parse_source(tr("td")[4]), "variations": parse_furniture_variations(tr("td")[5]), "customization": parse_customization(tr("td")[6]), "size_image_url": parse_image_img_url(tr("td")[7]), } items[name] = item dump_data(items, "furniture/" + key) return items
def scrape_umbrellas(key): url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "roundy"}) items = {} for tr in table[2].find_all("tr")[2:]: name = tr.find_all("td")[0].text.strip() item = { "name": name, "imageLink": tr.find_all("td")[1].find_all("a")[0]["href"], "source": parse_source(tr.find_all("td")[2]), "priceBuy": parse_price(tr.find_all("td")[3].text), "priceSell": parse_price(tr.find_all("td")[4].text), } items[name] = item dump_data(items, "clothing/" + key) return items
def scrape_music(key): url = URLS.get(key) response = requests.get(url, timeout=5) soup = BeautifulSoup(response.content, "html.parser") tables = soup("table", {"class": "article-table"}) items = {} for tr in tables[0]("tr")[1:]: name = tr("td")[0].text.strip() item_key = name.replace(" ", "_").replace("-", "_") item = { "name": name, "image_url": parse_image_url(tr.find_all("td")[1]), "priceBuy": parse_price(tr.find_all("td")[2].text), "priceSell": parse_price(tr.find_all("td")[3].text), "source": parse_source(tr.find_all("td")[4]) } items[item_key] = item dump_data(items, "music/" + key) return items
def scrape_furniture_wallpapers(key): url = URLS["furniture"][key] response = requests.get(url, timeout=5) soup = BeautifulSoup(response.content, "html5lib") # html.parser does not scrape all html contents tables = soup("table", {"class": "roundy"}) items = {} for tr in tables[3]("tr")[2:]: name = tr("td")[1].text.strip() item = { "image_url": parse_image_url(tr("td")[0]), "price": { "buy": parse_price(tr("td")[2].text), "sell": parse_price(tr("td")[3].text) }, "source": parse_source(tr("td")[4]), } items[name] = item dump_data(items, "furniture/" + key) return items
def scrape_furniture_housewares(key): url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "roundy"}) items = {} print(table[3]("tr")) for tr in table[3]("tr")[2:]: name = tr.find_all("td")[1].text.strip() item = { "name": name, # "imageLink": tr.find_all("td")[1].find_all("a")[0]["href"], "priceBuy": parse_price(tr.find_all("td")[2].text), "priceSell": parse_price(tr.find_all("td")[3].text), "source": parse_source(tr.find_all("td")[4]), "variations": parse_variations(tr.find_all("td")[5]), "customization": False, "sizeLink": tr.find_all("td")[6].img.get("data-src") } if tr.find_all("td")[1].find_all("a"): item["imageLink"] = tr.find_all("td")[0].find_all("a")[0]["href"] items[name] = item dump_data(items, "furniture/" + key) return items