Example #1
0
def scrape_shoes(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "roundy"})
    items = {}
    for tableNumber in range(2, 8):
        for tr in table[tableNumber].find_all("tr")[2:]:
            name = tr.find_all("td")[0].text.strip()
            item = {
                "name":
                name,
                # "imageLink": tr.find_all("td")[1].find_all("a")[0]["href"],
                "priceBuy":
                parse_price(tr.find_all("td")[2].text),
                "priceSell":
                parse_price(tr.find_all("td")[3].text),
                "source":
                parse_source(tr.find_all("td")[4]),
                "variations":
                parse_variations(tr.find_all("td")[5]),
                "variationImageLinks":
                get_image_links(tr.find_all("td")[5].find_all("img"))
            }
            if tr.find_all("td")[1].find_all("a"):
                item["imageLink"] = tr.find_all("td")[1].find_all(
                    "a")[0]["href"]
            items[name] = item
    dump_data(items, "clothing/" + key)
    return items
def scrape_furniture_housewares(key):
    url = URLS["furniture"][key]
    response = requests.get(url, timeout=5)
    soup = BeautifulSoup(
        response.content,
        "html5lib")  # html.parser does not scrape all html contents
    tables = soup("table", {"class": "roundy"})
    items = {}
    for table_number in range(3, 29):  # a - z
        if len(tables[table_number]("tr")) > 3:  # some tables are empty
            for tr in tables[table_number]("tr")[2:]:
                name = tr("td")[1].text.strip()
                item = {
                    "image_url": parse_image_url(tr("td")[0]),
                    "price": {
                        "buy": parse_price(tr("td")[2].text),
                        "sell": parse_price(tr("td")[3].text)
                    },
                    "source": parse_source(tr("td")[4]),
                    "variations": parse_furniture_variations(tr("td")[5]),
                    "customization": parse_customization(tr("td")[6]),
                    "size_image_url": parse_image_img_url(tr("td")[7]),
                }
                items[name] = item
    dump_data(items, "furniture/" + key)
    return items
Example #3
0
def scrape_umbrellas(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "roundy"})
    items = {}
    for tr in table[2].find_all("tr")[2:]:
        name = tr.find_all("td")[0].text.strip()
        item = {
            "name": name,
            "imageLink": tr.find_all("td")[1].find_all("a")[0]["href"],
            "source": parse_source(tr.find_all("td")[2]),
            "priceBuy": parse_price(tr.find_all("td")[3].text),
            "priceSell": parse_price(tr.find_all("td")[4].text),
        }
        items[name] = item
    dump_data(items, "clothing/" + key)
    return items
Example #4
0
def scrape_music(key):
    url = URLS.get(key)
    response = requests.get(url, timeout=5)
    soup = BeautifulSoup(response.content, "html.parser")
    tables = soup("table", {"class": "article-table"})
    items = {}
    for tr in tables[0]("tr")[1:]:
        name = tr("td")[0].text.strip()
        item_key = name.replace(" ", "_").replace("-", "_")
        item = {
            "name": name,
            "image_url": parse_image_url(tr.find_all("td")[1]),
            "priceBuy": parse_price(tr.find_all("td")[2].text),
            "priceSell": parse_price(tr.find_all("td")[3].text),
            "source": parse_source(tr.find_all("td")[4])
        }
        items[item_key] = item
    dump_data(items, "music/" + key)
    return items
Example #5
0
def scrape_furniture_wallpapers(key):
    url = URLS["furniture"][key]
    response = requests.get(url, timeout=5)
    soup = BeautifulSoup(response.content, "html5lib") # html.parser does not scrape all html contents
    tables = soup("table", {"class": "roundy"})
    items = {}
    for tr in tables[3]("tr")[2:]:
        name = tr("td")[1].text.strip()
        item = {
            "image_url": parse_image_url(tr("td")[0]),
            "price": {
                "buy": parse_price(tr("td")[2].text),
                "sell": parse_price(tr("td")[3].text)
            },
            "source": parse_source(tr("td")[4]),
        }
        items[name] = item
    dump_data(items, "furniture/" + key)
    return items
Example #6
0
def scrape_furniture_housewares(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "roundy"})
    items = {}
    print(table[3]("tr"))
    for tr in table[3]("tr")[2:]:
        name = tr.find_all("td")[1].text.strip()
        item = {
            "name": name,
            # "imageLink": tr.find_all("td")[1].find_all("a")[0]["href"],
            "priceBuy": parse_price(tr.find_all("td")[2].text),
            "priceSell": parse_price(tr.find_all("td")[3].text),
            "source": parse_source(tr.find_all("td")[4]),
            "variations": parse_variations(tr.find_all("td")[5]),
            "customization": False,
            "sizeLink": tr.find_all("td")[6].img.get("data-src")
        }
        if tr.find_all("td")[1].find_all("a"):
            item["imageLink"] = tr.find_all("td")[0].find_all("a")[0]["href"]
        items[name] = item
    dump_data(items, "furniture/" + key)
    return items