コード例 #1
0
def scrapeDIYTools(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    items = {}
    for tr in table[0].find_all("tr")[1:]:
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
        }
        if tr.findChildren("a")[1]['href']:
            item["imageLink"] = tr.findChildren("a")[1]['href']
        if tr.findChildren("td")[2]:
            item["materials"] = separateByBr(
                tr.findChildren("td")[2]).strip("\n").split(",")
        if tr.findChildren("td")[2].find_all("img"):
            item["materialsImageLink"] = getImageLinks(
                tr.findChildren("td")[2].find_all("img"))
        if tr.findChildren("td")[3].img.get("data-src"):
            item["sizeImageLink"] = tr.findChildren("td")[3].img.get(
                "data-src")
        if tr.findChildren("td")[4].text:
            item["obtainedFrom"] = tr.findChildren("td")[4].text.strip(
                "\n").splitlines()
        if tr.findChildren("td")[5]:
            item["price"] = int(
                tr.findChildren("td")[5].next.strip().replace(",", ""))
        if tr.findChildren("td")[6]:
            item["isRecipeItem"] = avaiConverter(
                tr.findChildren("td")[6].next.strip("\n"))
        items[name] = item
    dumpData(items, key)
    return items
コード例 #2
0
def scrapeDIYTools(key):
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        for td in item.find_all("td"):
            if not td.string is None:
                itemInfo.append(td.next.strip())
            else:
                itemInfo.append(td.next)
        itemObject = {
            "name": item.findChildren("td")[0].a.text,
        }
        try:
            itemObject["imageLink"] = item.findChildren("a")[1]['href']
        except AttributeError:
            itemObject["imageLink"] = None
        try:
            itemObject["materials"] = separateByBr(
                item.findChildren("td")[2]).strip("\n").split(",")
        except AttributeError:
            itemObject["materials"] = []
        try:
            itemObject["materialsImageLink"] = getImageLinks(
                item.findChildren("td")[2].find_all("img"))
        except AttributeError:
            itemObject["materialsImageLink"] = []
        try:
            itemObject["sizeLink"] = itemInfo[3].img.get("data-src")
        except AttributeError:
            itemObject["sizeLink"] = None
        try:
            itemObject["obtainedFrom"] = itemInfo[4].text
        except AttributeError:
            itemObject["obtainedFrom"] = None
        try:
            itemObject["price"] = int(itemInfo[5].strip().replace(",", ""))
        except:
            itemObject["price"] = None
        try:
            itemObject["isRecipeItem"] = avaiConverter(itemInfo[6])
        except:
            itemObject["isRecipeItem"] = None
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
コード例 #3
0
def scrapeFish(key):  # same logic as scrapeBugs
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    items = {}
    for tr in table[0].find_all("tr")[1:]:
        tableData = []
        for td in tr.find_all("td"):
            tableData.append(td.next.strip())
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("a")[1]['href'],
            "price": int(tableData[2]),
            "location": tr.findChildren("td")[3].text.strip('\n').strip(),
            "shadowSize": tableData[4],  # specific to fish
            "time": tr.findChildren("small")[0].text,
            "seasons-northern-hemisphere": {
                "jan": avaiConverter(tableData[6]),
                "feb": avaiConverter(tableData[7]),
                "mar": avaiConverter(tableData[8]),
                "apr": avaiConverter(tableData[9]),
                "may": avaiConverter(tableData[10]),
                "jun": avaiConverter(tableData[11]),
                "jul": avaiConverter(tableData[12]),
                "aug": avaiConverter(tableData[13]),
                "sep": avaiConverter(tableData[14]),
                "oct": avaiConverter(tableData[15]),
                "nov": avaiConverter(tableData[16]),
                "dec": avaiConverter(tableData[17])
            },
            "seasons-southern-hemisphere": {
                "jan": avaiConverter(tableData[12]),
                "feb": avaiConverter(tableData[13]),
                "mar": avaiConverter(tableData[14]),
                "apr": avaiConverter(tableData[15]),
                "may": avaiConverter(tableData[16]),
                "jun": avaiConverter(tableData[17]),
                "jul": avaiConverter(tableData[6]),
                "aug": avaiConverter(tableData[7]),
                "sep": avaiConverter(tableData[8]),
                "oct": avaiConverter(tableData[9]),
                "nov": avaiConverter(tableData[10]),
                "dec": avaiConverter(tableData[11])
            }
        }
        items[name] = item
    dumpData(items, key)
    return items
コード例 #4
0
def scrapeBugs(key):  # take url and return object containing bugs data
    url = URLS.get(key)
    # create soup object
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    # find the target table
    table = soup.find_all("table", {"class": "sortable"})

    items = {}
    # go through each tr in the table, ignoring the table header
    for tr in table[0].find_all("tr")[1:]:
        tableData = []
        # get rid of empty space
        for td in tr.find_all("td"):
            tableData.append(td.next.strip())
        # find data and save it into an object
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("a")[1]['href'],
            "price": int(tableData[2]),
            "location": tr.findChildren("td")[3].text.strip('\n').strip(),
            "time": tr.findChildren("small")[0].text,
            "seasons-northern-hemisphere": {
                "jan": avaiConverter(tableData[5]),
                "feb": avaiConverter(tableData[6]),
                "mar": avaiConverter(tableData[7]),
                "apr": avaiConverter(tableData[8]),
                "may": avaiConverter(tableData[9]),
                "jun": avaiConverter(tableData[10]),
                "jul": avaiConverter(tableData[11]),
                "aug": avaiConverter(tableData[12]),
                "sep": avaiConverter(tableData[13]),
                "oct": avaiConverter(tableData[14]),
                "nov": avaiConverter(tableData[15]),
                "dec": avaiConverter(tableData[16])
            },
            "seasons-southern-hemisphere":
            {  # shift northern hemisphere by 6 months
                "jan": avaiConverter(tableData[11]),
                "feb": avaiConverter(tableData[12]),
                "mar": avaiConverter(tableData[13]),
                "apr": avaiConverter(tableData[14]),
                "may": avaiConverter(tableData[15]),
                "jun": avaiConverter(tableData[16]),
                "jul": avaiConverter(tableData[5]),
                "aug": avaiConverter(tableData[6]),
                "sep": avaiConverter(tableData[7]),
                "oct": avaiConverter(tableData[8]),
                "nov": avaiConverter(tableData[9]),
                "dec": avaiConverter(tableData[10])
            }
        }
        items[name] = item
    dumpData(items, key)
    # return for debugging
    return items
コード例 #5
0
def scrapeFish(key):  # same logic as scrapeBugs
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        for td in item.find_all("td"):
            itemInfo.append(td.next.strip())
        itemObject = {
            "name": item.findChildren("a")[0].text,
            "imageLink": item.findChildren("a")[1]['href'],
            "price": int(itemInfo[2]),
            "location": item.findChildren("td")[3].text.strip('\n').strip(),
            "shadowSize": itemInfo[4],  # specific to fish
            "time": item.findChildren("small")[0].text,
            "jan": avaiConverter(itemInfo[6]),
            "feb": avaiConverter(itemInfo[7]),
            "mar": avaiConverter(itemInfo[8]),
            "apr": avaiConverter(itemInfo[9]),
            "may": avaiConverter(itemInfo[10]),
            "jun": avaiConverter(itemInfo[11]),
            "jul": avaiConverter(itemInfo[12]),
            "aug": avaiConverter(itemInfo[13]),
            "sep": avaiConverter(itemInfo[14]),
            "oct": avaiConverter(itemInfo[15]),
            "nov": avaiConverter(itemInfo[16]),
            "dec": avaiConverter(itemInfo[17])
        }
        itemList.append(itemObject)
    dumpData(itemList, key)

    return itemList
コード例 #6
0
def scrapeBugs(key):  # take url and return object containing bugs data
    url = urls.get(key)
    # create soup object
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    # find the target table
    table = soup.find_all("table", {"class": "sortable"})
    # contains all items
    itemList = []
    # ignore first row as it just contains labels to the data
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        # get rid of empty space
        for td in item.find_all("td"):
            itemInfo.append(td.next.strip())

        # find data and save it into an object
        itemObject = {
            "name": item.findChildren("td")[0].a.text,
            "imageLink": item.findChildren("a")[1]['href'],
            "price": int(itemInfo[2]),
            "location": item.findChildren("td")[3].text.strip('\n').strip(),
            "time": item.findChildren("small")[0].text,
            "jan": avaiConverter(itemInfo[5]),
            "feb": avaiConverter(itemInfo[6]),
            "mar": avaiConverter(itemInfo[7]),
            "apr": avaiConverter(itemInfo[8]),
            "may": avaiConverter(itemInfo[9]),
            "jun": avaiConverter(itemInfo[10]),
            "jul": avaiConverter(itemInfo[11]),
            "aug": avaiConverter(itemInfo[12]),
            "sep": avaiConverter(itemInfo[13]),
            "oct": avaiConverter(itemInfo[14]),
            "nov": avaiConverter(itemInfo[15]),
            "dec": avaiConverter(itemInfo[16])
        }
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList