def scrapeDIYTools(key): url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) items = {} for tr in table[0].find_all("tr")[1:]: name = tr.findChildren("td")[0].a.text item = { "name": name, } if tr.findChildren("a")[1]['href']: item["imageLink"] = tr.findChildren("a")[1]['href'] if tr.findChildren("td")[2]: item["materials"] = separateByBr( tr.findChildren("td")[2]).strip("\n").split(",") if tr.findChildren("td")[2].find_all("img"): item["materialsImageLink"] = getImageLinks( tr.findChildren("td")[2].find_all("img")) if tr.findChildren("td")[3].img.get("data-src"): item["sizeImageLink"] = tr.findChildren("td")[3].img.get( "data-src") if tr.findChildren("td")[4].text: item["obtainedFrom"] = tr.findChildren("td")[4].text.strip( "\n").splitlines() if tr.findChildren("td")[5]: item["price"] = int( tr.findChildren("td")[5].next.strip().replace(",", "")) if tr.findChildren("td")[6]: item["isRecipeItem"] = avaiConverter( tr.findChildren("td")[6].next.strip("\n")) items[name] = item dumpData(items, key) return items
def scrapeDIYTools(key): url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] for item in table[0].find_all("tr")[1:]: itemInfo = [] for td in item.find_all("td"): if not td.string is None: itemInfo.append(td.next.strip()) else: itemInfo.append(td.next) itemObject = { "name": item.findChildren("td")[0].a.text, } try: itemObject["imageLink"] = item.findChildren("a")[1]['href'] except AttributeError: itemObject["imageLink"] = None try: itemObject["materials"] = separateByBr( item.findChildren("td")[2]).strip("\n").split(",") except AttributeError: itemObject["materials"] = [] try: itemObject["materialsImageLink"] = getImageLinks( item.findChildren("td")[2].find_all("img")) except AttributeError: itemObject["materialsImageLink"] = [] try: itemObject["sizeLink"] = itemInfo[3].img.get("data-src") except AttributeError: itemObject["sizeLink"] = None try: itemObject["obtainedFrom"] = itemInfo[4].text except AttributeError: itemObject["obtainedFrom"] = None try: itemObject["price"] = int(itemInfo[5].strip().replace(",", "")) except: itemObject["price"] = None try: itemObject["isRecipeItem"] = avaiConverter(itemInfo[6]) except: itemObject["isRecipeItem"] = None itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeFish(key): # same logic as scrapeBugs url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) items = {} for tr in table[0].find_all("tr")[1:]: tableData = [] for td in tr.find_all("td"): tableData.append(td.next.strip()) name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("a")[1]['href'], "price": int(tableData[2]), "location": tr.findChildren("td")[3].text.strip('\n').strip(), "shadowSize": tableData[4], # specific to fish "time": tr.findChildren("small")[0].text, "seasons-northern-hemisphere": { "jan": avaiConverter(tableData[6]), "feb": avaiConverter(tableData[7]), "mar": avaiConverter(tableData[8]), "apr": avaiConverter(tableData[9]), "may": avaiConverter(tableData[10]), "jun": avaiConverter(tableData[11]), "jul": avaiConverter(tableData[12]), "aug": avaiConverter(tableData[13]), "sep": avaiConverter(tableData[14]), "oct": avaiConverter(tableData[15]), "nov": avaiConverter(tableData[16]), "dec": avaiConverter(tableData[17]) }, "seasons-southern-hemisphere": { "jan": avaiConverter(tableData[12]), "feb": avaiConverter(tableData[13]), "mar": avaiConverter(tableData[14]), "apr": avaiConverter(tableData[15]), "may": avaiConverter(tableData[16]), "jun": avaiConverter(tableData[17]), "jul": avaiConverter(tableData[6]), "aug": avaiConverter(tableData[7]), "sep": avaiConverter(tableData[8]), "oct": avaiConverter(tableData[9]), "nov": avaiConverter(tableData[10]), "dec": avaiConverter(tableData[11]) } } items[name] = item dumpData(items, key) return items
def scrapeBugs(key): # take url and return object containing bugs data url = URLS.get(key) # create soup object response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") # find the target table table = soup.find_all("table", {"class": "sortable"}) items = {} # go through each tr in the table, ignoring the table header for tr in table[0].find_all("tr")[1:]: tableData = [] # get rid of empty space for td in tr.find_all("td"): tableData.append(td.next.strip()) # find data and save it into an object name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("a")[1]['href'], "price": int(tableData[2]), "location": tr.findChildren("td")[3].text.strip('\n').strip(), "time": tr.findChildren("small")[0].text, "seasons-northern-hemisphere": { "jan": avaiConverter(tableData[5]), "feb": avaiConverter(tableData[6]), "mar": avaiConverter(tableData[7]), "apr": avaiConverter(tableData[8]), "may": avaiConverter(tableData[9]), "jun": avaiConverter(tableData[10]), "jul": avaiConverter(tableData[11]), "aug": avaiConverter(tableData[12]), "sep": avaiConverter(tableData[13]), "oct": avaiConverter(tableData[14]), "nov": avaiConverter(tableData[15]), "dec": avaiConverter(tableData[16]) }, "seasons-southern-hemisphere": { # shift northern hemisphere by 6 months "jan": avaiConverter(tableData[11]), "feb": avaiConverter(tableData[12]), "mar": avaiConverter(tableData[13]), "apr": avaiConverter(tableData[14]), "may": avaiConverter(tableData[15]), "jun": avaiConverter(tableData[16]), "jul": avaiConverter(tableData[5]), "aug": avaiConverter(tableData[6]), "sep": avaiConverter(tableData[7]), "oct": avaiConverter(tableData[8]), "nov": avaiConverter(tableData[9]), "dec": avaiConverter(tableData[10]) } } items[name] = item dumpData(items, key) # return for debugging return items
def scrapeFish(key): # same logic as scrapeBugs url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] for item in table[0].find_all("tr")[1:]: itemInfo = [] for td in item.find_all("td"): itemInfo.append(td.next.strip()) itemObject = { "name": item.findChildren("a")[0].text, "imageLink": item.findChildren("a")[1]['href'], "price": int(itemInfo[2]), "location": item.findChildren("td")[3].text.strip('\n').strip(), "shadowSize": itemInfo[4], # specific to fish "time": item.findChildren("small")[0].text, "jan": avaiConverter(itemInfo[6]), "feb": avaiConverter(itemInfo[7]), "mar": avaiConverter(itemInfo[8]), "apr": avaiConverter(itemInfo[9]), "may": avaiConverter(itemInfo[10]), "jun": avaiConverter(itemInfo[11]), "jul": avaiConverter(itemInfo[12]), "aug": avaiConverter(itemInfo[13]), "sep": avaiConverter(itemInfo[14]), "oct": avaiConverter(itemInfo[15]), "nov": avaiConverter(itemInfo[16]), "dec": avaiConverter(itemInfo[17]) } itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeBugs(key): # take url and return object containing bugs data url = urls.get(key) # create soup object response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") # find the target table table = soup.find_all("table", {"class": "sortable"}) # contains all items itemList = [] # ignore first row as it just contains labels to the data for item in table[0].find_all("tr")[1:]: itemInfo = [] # get rid of empty space for td in item.find_all("td"): itemInfo.append(td.next.strip()) # find data and save it into an object itemObject = { "name": item.findChildren("td")[0].a.text, "imageLink": item.findChildren("a")[1]['href'], "price": int(itemInfo[2]), "location": item.findChildren("td")[3].text.strip('\n').strip(), "time": item.findChildren("small")[0].text, "jan": avaiConverter(itemInfo[5]), "feb": avaiConverter(itemInfo[6]), "mar": avaiConverter(itemInfo[7]), "apr": avaiConverter(itemInfo[8]), "may": avaiConverter(itemInfo[9]), "jun": avaiConverter(itemInfo[10]), "jul": avaiConverter(itemInfo[11]), "aug": avaiConverter(itemInfo[12]), "sep": avaiConverter(itemInfo[13]), "oct": avaiConverter(itemInfo[14]), "nov": avaiConverter(itemInfo[15]), "dec": avaiConverter(itemInfo[16]) } itemList.append(itemObject) dumpData(itemList, key) return itemList