def parse_html(xml_data): feed = feedparser.parse(xml_data) try: last_updated = feed["entries"][0]["updated"] except KeyError: last_updated = utc_now() data = { "lots": [], # remove trailing timezone for consensistency "last_updated": last_updated.replace("Z", "") } for entry in feed["entries"]: summary = parse_summary(entry["summary"]) title_elements = parse_title(entry["title"]) lot_identifier = (title_elements[2] + " " + title_elements[0]).strip() lot = geodata.lot(lot_identifier) data["lots"].append({ "name": title_elements[0], "address": title_elements[1], "id": lot.id, "state": summary[0], "free": summary[1], "total": lot.total, "coords": lot.coords, "forecast": False, "lot_type": title_elements[2] }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") lots = [] for row in soup.find_all("div", class_='vc_row wpb_row section vc_row-fluid parking-lots grid_section'): for column in row.find_all("div", class_='vc_col-sm-3 wpb_column vc_column_container '): h3 = column.find_all("h3") if not h3[0].a == None: name = h3[0].a.string lot = geodata.lot(name) lots.append({ "name": name, "coords": lot.coords, "free": int(h3[1].span.strong.get_text()), "address": lot.address, "total": lot.total, "state": "nodata", "id": lot.id, "forecast": False }) return { "last_updated": utc_now(), "lots": lots }
def parse_html(xml_data): feed = feedparser.parse(xml_data) try: last_updated = feed["entries"][0]["updated"] last_updated = datetime.strptime(last_updated[5:25], "%d %b %Y %H:%M:%S").isoformat() except KeyError: last_updated = utc_now() data = { "lots": [], "last_updated": last_updated } for entry in feed["entries"]: summary = parse_summary(entry["summary"]) title_elements = parse_title(entry["title"]) lot_identifier = html.unescape((title_elements[2] + " " + title_elements[0]).strip()) lot = geodata.lot(lot_identifier) data["lots"].append({ "name": html.unescape(title_elements[0]), "address": lot.address, "id": html.unescape(lot.id), "state": "open", "free": summary[1], "total": lot.total, "coords": lot.coords, "forecast": False, "lot_type": title_elements[2] }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") lots = [] for row in soup.find_all("div", class_='parking-lots'): entity_wrapper_class = 'wpb_column vc_column_container vc_col-sm-3' for column in row.find_all("div", class_=entity_wrapper_class): h3 = column.find_all("h3") if not h3[0].a is None and len(h3) > 1: name = h3[0].a.string free = 0 for heading in h3: for heading_element in heading.find_all("span"): if heading_element.find("strong") is not None: free = int(heading_element.strong.get_text()) lot = geodata.lot(name) ltype = None for p in [pt for pt in ["Parkhaus", "Parkplatz"] if pt in name]: ltype = p lots.append({ "name": name, "coords": lot.coords, "free": free, "address": lot.address, "total": lot.total, "state": "unknown", "id": lot.id, "lot_type": ltype, "forecast": False }) else: name = h3[0].string ltype = None if "Parkhaus" in name: ltype = "Parkhaus" elif "Parkplatz" in name: ltype = "Parkplatz" lot = geodata.lot(name) lots.append({ "name": name, "coords": lot.coords, "free": 0, "address": lot.address, "total": lot.total, "state": "nodata", "id": lot.id, "lot_type": ltype, "forecast": False }) return { "last_updated": utc_now(), "lots": lots }
def parse_html(html): soup = BeautifulSoup(html, "html.parser") lots = [] for row in soup.find_all("div", class_='parking-lots'): entity_wrapper_class = 'wpb_column vc_column_container vc_col-sm-3' for column in row.find_all("div", class_=entity_wrapper_class): h3 = column.find_all("h3") if not h3[0].a is None and len(h3) > 1: name = h3[0].a.string free = 0 for heading in h3: for heading_element in heading.find_all("span"): if heading_element.find("strong") is not None: free = int(heading_element.strong.get_text()) lot = geodata.lot(name) ltype = None for p in [ pt for pt in ["Parkhaus", "Parkplatz"] if pt in name ]: ltype = p lots.append({ "name": name, "coords": lot.coords, "free": free, "address": lot.address, "total": lot.total, "state": "unknown", "id": lot.id, "lot_type": ltype, "forecast": False }) elif h3[0].string: name = h3[0].string ltype = None if "Parkhaus" in name: ltype = "Parkhaus" elif "Parkplatz" in name: ltype = "Parkplatz" lot = geodata.lot(name) lots.append({ "name": name, "coords": lot.coords, "free": 0, "address": lot.address, "total": lot.total, "state": "nodata", "id": lot.id, "lot_type": ltype, "forecast": False }) return {"last_updated": utc_now(), "lots": lots}
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html, "html.parser") data = { "last_updated": '', # URL for the page where the scraper can gather the data "lots": [] } try: # <div class="container-fluid" parking_data = soup.find('div', class_='container-fluid') # Letzte Aktualisierung: 04.07.2019 11:03:00 last_updated = convert_date( parking_data.find('h5').text, 'Letzte Aktualisierung: %d.%m.%Y %H:%M:%S') data["last_updated"] = last_updated except: # if the service is unavailable (did happen in one of my tests): data["last_updated"] = utc_now() # return data parking_lots = parking_data.find_all('div', class_='well') for one_parking_lot in parking_lots: parking_name = one_parking_lot.find('b').text.strip() lot = geodata.lot(parking_name) parking_free = 0 try: parking_status = 'open' parking_free = int( one_parking_lot.find_all( 'div', role='progressbar')[1].find('b').text.strip()) except: parking_status = 'nodata' data["lots"].append({ "name": parking_name, "free": parking_free, "total": lot.total, "address": lot.address, "coords": lot.coords, "state": parking_status, "lot_type": lot.type, "id": lot.id, "forecast": False }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") lots = [] for row in soup.find_all("div", class_='parking-lots'): for column in row.find_all("div", class_='wpb_column vc_column_container vc_col-sm-3'): h3 = column.find_all("h3") if not h3[0].a == None: name = h3[0].a.string lot = geodata.lot(name) ltype = None for p in [pt for pt in ["Parkhaus", "Parkplatz"] if pt in name]: ltype = p lots.append({ "name": name, "coords": lot.coords, "free": int(h3[1].span.strong.get_text()), "address": lot.address, "total": lot.total, "state": "unknown", "id": lot.id, "lot_type": ltype, "forecast": False }) else: name = h3[0].string ltype = None if "Parkhaus" in name: ltype = "Parkhaus" elif "Parkplatz" in name: ltype="Parkplatz" lot = geodata.lot(name) lots.append({ "name": name, "coords": lot.coords, "free": 0, "address": lot.address, "total": lot.total, "state": "nodata", "id": lot.id, "lot_type": ltype, "forecast": False }) return { "last_updated": utc_now(), "lots": lots }
def parse_html(html): soup = BeautifulSoup(html, "html.parser") lots = [] for row in soup.find_all("div", class_='parking-lots'): for column in row.find_all( "div", class_='wpb_column vc_column_container vc_col-sm-3'): h3 = column.find_all("h3") if not h3[0].a == None: name = h3[0].a.string lot = geodata.lot(name) ltype = None for p in [ pt for pt in ["Parkhaus", "Parkplatz"] if pt in name ]: ltype = p lots.append({ "name": name, "coords": lot.coords, "free": int(h3[1].span.strong.get_text()), "address": lot.address, "total": lot.total, "state": "unknown", "id": lot.id, "lot_type": ltype, "forecast": False }) else: name = h3[0].string ltype = None if "Parkhaus" in name: ltype = "Parkhaus" elif "Parkplatz" in name: ltype = "Parkplatz" lot = geodata.lot(name) lots.append({ "name": name, "coords": lot.coords, "free": 0, "address": lot.address, "total": lot.total, "state": "nodata", "id": lot.id, "lot_type": ltype, "forecast": False }) return {"last_updated": utc_now(), "lots": lots}
def save_data_to_db(cursor, parking_data, city): """Save the data given into the Postgres DB.""" timestamp_updated = parking_data["last_updated"] timestamp_downloaded = util.utc_now() json_data = json.dumps(parking_data) sql_string = "INSERT INTO parkapi(timestamp_updated, timestamp_downloaded, city, data) " \ "VALUES (%(updated)s, %(downloaded)s, %(city)s, %(data)s) RETURNING 'id';" cursor.execute(sql_string, { "updated": timestamp_updated, "downloaded": timestamp_downloaded, "city": city, "data": json_data }) print("Saved " + city + " to DB.")
def parse_html(xml): soup = BeautifulSoup(xml, "html.parser") # last_updated is the date when the data on the page was last updated, it should be listed on most pages try: last_updated = soup.select("zeitstempel")[0].text except KeyError: last_updated = utc_now() data = { # convert_date is a utility function you can use to turn this date into the correct string format "last_updated": datetime.strptime(last_updated[0:16], "%d.%m.%Y %H:%M").isoformat(), # URL for the page where the scraper can gather the data "lots": [] } for ph in soup.find_all("parkhaus"): lot_name = ph.find("name").text lot_actual = int(ph.find("aktuell").text) lot_total = int(ph.find("gesamt").text) lot_free = lot_total - lot_actual # please be careful about the state only being allowed to contain either open, closed or nodata # should the page list other states, please map these into the three listed possibilities # translate german state to english stateGerman = ph.find("status").text if stateGerman == ("Offen"): state = "open" elif stateGerman == ("Geschlossen"): state = "closed" else: state = "nodata" lot = geodata.lot(lot_name) data["lots"].append({ "name": lot.name, "free": lot_free, "total": lot_total, "address": lot.address, "coords": lot.coords, "state": state, "lot_type": lot.type, "id": lot.id, "forecast": False, }) return data
def save_data_to_db(cursor, parking_data, city): """Save the data given into the Postgres DB.""" timestamp_updated = parking_data["last_updated"] timestamp_downloaded = util.utc_now() json_data = json.dumps(parking_data) sql_string = "INSERT INTO parkapi(timestamp_updated, timestamp_downloaded, city, data) " \ "VALUES (%(updated)s, %(downloaded)s, %(city)s, %(data)s) RETURNING 'id';" cursor.execute( sql_string, { "updated": timestamp_updated, "downloaded": timestamp_downloaded, "city": city, "data": json_data }) print("Saved " + city + " to DB.")
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html, "html.parser") data = { "last_updated": utc_now(), # not found on site, so we use something else # URL for the page where the scraper can gather the data "lots": [] } # for handling duplicate entries dataUniqe = dict() # find all entries: # suche <div class="houses"> parking_houses = soup.find_all('div', class_='houses') for parking_group in parking_houses: parking_lots = parking_group.find_all('li') for one_lot in parking_lots: parking_name = one_lot.find('a').text if (not parking_name in dataUniqe): dataUniqe[parking_name] = 1 # add this to the list lot = geodata.lot(parking_name) parking_state = 'open' parking_free = 0 try: parking_free = int( one_lot.find('span', class_='free-text').text.split()[0]) except: parking_state = 'nodata' data["lots"].append({ "name": parking_name, "free": parking_free, "total": lot.total, "address": lot.address, "coords": lot.coords, "state": parking_state, "lot_type": lot.type, "id": lot.id, "forecast": False }) return data
def parse_html(html): soup = BeautifulSoup(html, "html.parser") # last update time (UTC) # Konstanz does not support the last_updated yet. I hope they will inform me when it's added # as the data seems accurate I will return the current time and date data = {"last_updated": utc_now(), "lots": []} # get all tables with lots parken = soup.find_all("table", class_="parken") # get all lots for park_lot in parken: td = park_lot.find_all("td") parking_name = td[0].text.strip() if parking_name == "Parkmöglichkeit": continue # work-around for the Umlaute-problem: ugly but working if ('Marktst' in parking_name): parking_name = 'Marktstätte' elif ('bele' in parking_name): parking_name = 'Döbele' # get the data lot = geodata.lot(parking_name) # look for free lots parking_state = 'open' parking_free = 0 try: parking_free = int(td[1].text) except: parking_state = 'nodata' data["lots"].append({ "name": parking_name, "free": parking_free, "total": lot.total, "address": lot.address, "coords": lot.coords, "state": parking_state, "lot_type": lot.type, "id": lot.id, "forecast": False }) return data
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html, "html.parser") # last_updated is the date when the data on the page was last updated, it should be listed on most pages # Uhrzeit like Konstanz data = { # last_updated like Konstanz "last_updated": utc_now(), # URL for the page where the scraper can gather the data "lots": [] } table = soup.find('table', id='haupttabelle') table2 = table.find('table', width='790') rows = table2.find_all('tr') for row in rows[3:12]: parking_data = row.find_all('td') parking_name = parking_data[0].text lot = geodata.lot(parking_name) try: parking_state = 'open' parking_free = int(parking_data[2].text) except: parking_free = 0 parking_state = 'nodata' data["lots"].append({ "name": parking_name, "free": parking_free, "total": lot.total, "address": lot.address, "coords": lot.coords, "state": parking_state, "lot_type": lot.type, "id": lot.id, "forecast": False, }) return data
def parse_html(html): # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for. soup = BeautifulSoup(html, "html.parser") # last update time (UTC) # Karlsruhe does not support the last_upted yet. # as the data seems accurate I will return the current time and date data = {"last_updated": utc_now(), "lots": []} lots = soup.find_all('div', class_='parkhaus') for parking_lot in lots: parking_name = parking_lot.find('a').text lot = geodata.lot(parking_name) parking_state = 'open' parking_free = 0 parking_fuellstand = parking_lot.find('div', class_='fuellstand') try: if (parking_fuellstand == None): parking_state = 'nodata' else: temp = parking_fuellstand.text.split() parking_free = int(temp[0]) except: parking_state = 'nodata' data["lots"].append({ "name": parking_name, "free": parking_free, "total": lot.total, "address": lot.address, "coords": lot.coords, "state": parking_state, "lot_type": lot.type, "id": lot.id, "forecast": False, }) return data
def parse_html(html): data = { "last_updated": utc_now(), # not found on site, so we use something else # URL for the page where the scraper can gather the data "lots": [] } dataJSON = json.loads(html) # over all parking-lots for parking_lot in dataJSON: parking_name = parking_lot['title'] if (parking_name != 'Reserve'): lot = geodata.lot(parking_name) try: parking_free = 0 if (parking_lot['isOpened'] == False): parking_status = 'closed' else: parking_status = 'open' parking_free = int(parking_lot['free']) except: parking_status = 'nodata' data["lots"].append({ "name": parking_name, "free": parking_free, "total": parking_lot['parkings'], "address": lot.address, "coords": lot.coords, "state": parking_status, "lot_type": lot.type, "id": lot.id, "forecast": False }) return data
def get_api_status(): return jsonify({ "status": "online", "server_time": util.utc_now(), "load": getloadavg() })
def add_metadata(data): """Adds metadata to a scraped output dict""" data["last_downloaded"] = util.utc_now() return data