def scrape_furniture_housewares(key):
    url = URLS["furniture"][key]
    response = requests.get(url, timeout=5)
    soup = BeautifulSoup(
        response.content,
        "html5lib")  # html.parser does not scrape all html contents
    tables = soup("table", {"class": "roundy"})
    items = {}
    for table_number in range(3, 29):  # a - z
        if len(tables[table_number]("tr")) > 3:  # some tables are empty
            for tr in tables[table_number]("tr")[2:]:
                name = tr("td")[1].text.strip()
                item = {
                    "image_url": parse_image_url(tr("td")[0]),
                    "price": {
                        "buy": parse_price(tr("td")[2].text),
                        "sell": parse_price(tr("td")[3].text)
                    },
                    "source": parse_source(tr("td")[4]),
                    "variations": parse_furniture_variations(tr("td")[5]),
                    "customization": parse_customization(tr("td")[6]),
                    "size_image_url": parse_image_img_url(tr("td")[7]),
                }
                items[name] = item
    dump_data(items, "furniture/" + key)
    return items
Beispiel #2
0
def scrape_music(key):
    url = URLS.get(key)
    response = requests.get(url, timeout=5)
    soup = BeautifulSoup(response.content, "html.parser")
    tables = soup("table", {"class": "article-table"})
    items = {}
    for tr in tables[0]("tr")[1:]:
        name = tr("td")[0].text.strip()
        item_key = name.replace(" ", "_").replace("-", "_")
        item = {
            "name": name,
            "image_url": parse_image_url(tr.find_all("td")[1]),
            "priceBuy": parse_price(tr.find_all("td")[2].text),
            "priceSell": parse_price(tr.find_all("td")[3].text),
            "source": parse_source(tr.find_all("td")[4])
        }
        items[item_key] = item
    dump_data(items, "music/" + key)
    return items
Beispiel #3
0
def scrape_furniture_wallpapers(key):
    url = URLS["furniture"][key]
    response = requests.get(url, timeout=5)
    soup = BeautifulSoup(response.content, "html5lib") # html.parser does not scrape all html contents
    tables = soup("table", {"class": "roundy"})
    items = {}
    for tr in tables[3]("tr")[2:]:
        name = tr("td")[1].text.strip()
        item = {
            "image_url": parse_image_url(tr("td")[0]),
            "price": {
                "buy": parse_price(tr("td")[2].text),
                "sell": parse_price(tr("td")[3].text)
            },
            "source": parse_source(tr("td")[4]),
        }
        items[name] = item
    dump_data(items, "furniture/" + key)
    return items