def lookup_miles_from_user(each_item, start_lat, start_lng): """ Parameters ---------- each_item : BeautifulSoup object Pointer to each free item Returns ------- end_lat - string Final latitude - where the free item is end_lng Final longitude - - where the free item is miles - int Distance from user Exceptions AttributeError - a post without any text """ item_url = each_item.attrs["href"] craigs_resp = requestwrap.err_web(item_url) craigs_soup = BeautifulSoup(craigs_resp.text, "html.parser") googurl = craigs_soup.find("a", href=mapsre) try: end_lat, end_lng, _ = ( googurl.attrs["href"].split("@")[1].split("z")[0].split(",")) miles = geodesic((start_lat, start_lng), (end_lat, end_lng)).mi return end_lat, end_lng, miles except AttributeError: print(f"{each_item.text} was likely deleted") raise
def lookup_city_from_cl_url(craiglisturl): craigs_first_free = requestwrap.err_web(craiglisturl) craigs_first_free_soup = BeautifulSoup(craigs_first_free.text, "html.parser") try: metacity = (craigs_first_free_soup.find("meta", attrs={ "name": "geo.placename" }).get("content").lower()) metacity = "".join(metacity.split()) _, metastate = (craigs_first_free_soup.find( "meta", attrs={ "name": "geo.region" }).get("content").split("-")) except AttributeError as e: print(e) return None else: return metacity, metastate
def lookup_cost_lyft(start_lat, start_lng, end_lat, end_lng): """ Parameters ---------- start_lat, start_lng, end_lat, end_lng - strings - Start latitude and longitude from user to item Returns ------- mind, maxd - strings - minimum and maximum Lyft cost """ lyft_url = "http://www.lyft.com" lyft_path = f"/api/costs?start_lat={start_lat}&start_lng={start_lng}&end_lat={end_lat}&end_lng={end_lng}" lyft_costurl = lyft_url + lyft_path lyft_resp = requestwrap.err_web(lyft_costurl) fares = json.loads(lyft_resp.content) min = fares["cost_estimates"][0]["estimated_cost_cents_min"] max = fares["cost_estimates"][0]["estimated_cost_cents_max"] mind = min / 100 maxd = max / 100 return mind, maxd
def get_craigs_list_free_posts(craigs_list_url): """ Connect to Craigslist by appending the free URL params. Get the Free posts and return them. Parameters ---------- craigs_list_url str - local Craigs List Url Returns ------- craigs_free_posts beautiful soup_object - list of all free items craig_posts_with_data, ebay_prices, ebay_links """ if "newyork" in craigs_list_url: try: proto, _, url, suffix, *other = craigs_list_url.split("/") except Exception as e: print("New York URL unpacking error?", str(e)) raise else: craigs_free_url = ( f"{proto}//{url}/d/free-stuff/search/{suffix}/zip" # https:// ) else: craigs_free_url = craigs_list_url + "/d/free-stuff/search/zip" logging.info(f"Scraping {craigs_free_url}") craigs_response = requestwrap.err_web(craigs_free_url) craigs_soup = BeautifulSoup(craigs_response.text, "html.parser") craigs_free_posts = craigs_soup.find_all("a", class_="result-title hdrlnk") return craigs_free_posts
def lookup_price_on_ebay(num, each_post, timeout=30): """ Parameters ---------- each_post : BeautifulSoup object - bs4.element.Tag Pointer to each free item num: Index from enumerate() Returns ------- price - string Price as per Ebay Exceptions ValueError- a post without price and link info """ try: ebay_url = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=" ebay_path = ( f"{each_post.text}&_sacat=0&LH_TitleDesc=0&_osacat=0&_odkw={each_post.text}" ) ebay_query_url = ebay_url + ebay_path logging.info(f"{num} - Querying {ebay_query_url}") ebay_resp = requestwrap.err_web(ebay_query_url, timeout=timeout) ebay_soup = BeautifulSoup(ebay_resp.text, "html.parser") ebay_item_text = ebay_soup.find("h3", { "class": "s-item__title" }).get_text(separator=" ") except AttributeError: msg = f"{num} - No match on Ebay" logging.warning(f"{msg} for {each_post.text}") raise ValueError("{msg}") except requests.exceptions.RequestException as e: logging.error(f"{num} - {e} - {each_post.text}") raise HTTPError except Exception as e: msg = "Unhandled" logging.error(f"{msg} - {num} - {e} - {each_post.text}") raise ValueError("{msg}") else: logging.info( f"{num} - Crawled Ebay OK - search returned: {ebay_item_text}") # Keep only items with price and links try: price = ebay_soup.find("span", { "class": "s-item__price" }).get_text() except AttributeError: msg = f"{num} - No price on Ebay" logging.warning(f"{msg} for {ebay_item_text}") raise ValueError("{msg}") else: try: eb_link = ebay_soup.find("a", {"class": "s-item__link"}) eb_link = eb_link.attrs["href"] eb_link = eb_link.partition("?")[0] except AttributeError: msg = "Price, but no link on on Ebay?" logging.warning(f"{msg} for {ebay_item_text}") raise ValueError("{msg}") else: logging.info( f"{num} - Retrieved price of {price} at {eb_link}") return (price, eb_link)
# Given a city name, find the closest Craigslist Url citytext = f"{city},{state}" # print(citytext) craigs_list_url = craigzipsandurls.lookup_craigs_url(citytext).decode("UTF-8") print(f"{citytext} is available at {craigs_list_url}") sys.exit() start = "40.6490763" end = "-73.9762069" myzip = "32613" craigs_main_url = f"https://newyork.craigslist.org/search/brk/zip?postal={myzip}" craigs_main_resp = requestwrap.err_web(craigs_main_url) craigs_main_soup = BeautifulSoup(craigs_main_resp.text, "html.parser") craigs_main_posts = craigs_main_soup.find_all("a", class_="result-title hdrlnk") mapsre = re.compile("https://www.google.com/maps/preview/") lyft_url = "http://www.lyft.com" ebay_url = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=" for each_item in craigs_main_posts: item_url = each_item.attrs["href"] craigs_resp = requestwrap.err_web(item_url) craigs_soup = BeautifulSoup(craigs_resp.text, "html.parser") googurl = craigs_soup.find("a", href=mapsre) try: