Example #1
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    # last update time (UTC)
    try:
        date_col = soup.select('p > strong')[-1].text
        update_time = convert_date(date_col, "Stand: %d.%m.%Y - %H:%M:%S")
    except ValueError:
        date_col = soup.select('p > strong')[-2].text
        update_time = convert_date(date_col, "Stand: %d.%m.%Y - %H:%M:%S")

    data = {
        "last_updated": update_time,
        "lots": []
    }

    # get all tables with lots
    raw_lot_list = soup.find_all("div", {"class": "listing"})

    # get all lots
    for lot_list in raw_lot_list:
        raw_lots = lot_list.select('tr + tr')

        for lot in raw_lots:
            lot_name = lot.select('a')[0].text

            try:
                lot_free = int(lot.select('td + td')[0].text)
            except ValueError:
                lot_free = 0

            try:
                if "green" in str(lot.select("td + td")[0]):
                    lot_state = "open"
                else:
                    lot_state = "closed"
            except ValueError:
                lot_state = "nodata"

            lot = geodata.lot(lot_name)
            data["lots"].append({
                "name": lot_name,
                "free": lot_free,
                "total": lot.total,
                "coords": lot.coords,
                "state": lot_state,
                "id": lot.id,
                "forecast": False
            })

    return data
Example #2
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    # last update time (UTC)
    try:
        date_col = soup.select('p > strong')[-1].text
        update_time = convert_date(date_col, "Stand: %d.%m.%Y - %H:%M:%S")
    except ValueError:
        date_col = soup.select('p > strong')[-2].text
        update_time = convert_date(date_col, "Stand: %d.%m.%Y - %H:%M:%S")

    data = {"last_updated": update_time, "lots": []}

    # get all tables with lots
    raw_lot_list = soup.find_all("div", {"class": "listing"})

    # get all lots
    for lot_list in raw_lot_list:
        raw_lots = lot_list.select('tr + tr')

        for lot in raw_lots:
            lot_name = lot.select('a')[0].text

            try:
                lot_free = int(lot.select('td + td')[0].text)
            except ValueError:
                lot_free = 0

            try:
                if "green" in str(lot.select("td + td")[0]):
                    lot_state = "open"
                else:
                    lot_state = "closed"
            except ValueError:
                lot_state = "nodata"

            lot = geodata.lot(lot_name)
            data["lots"].append({
                "name": lot_name,
                "free": lot_free,
                "total": lot.total,
                "coords": lot.coords,
                "state": lot_state,
                "id": lot.id,
                "forecast": False
            })

    return data
Example #3
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": convert_date(soup.p.string, "(%d.%m.%Y, %H.%M Uhr)"),
        "data_source": data_source,
        "lots": []
    }

    # get all lots
    raw_lots = soup.find_all("tr")

    for lot in raw_lots:
        elements = lot.find_all("td")

        lot_name = elements[0].text

        data["lots"].append({
            "name": lot_name,
            "free": int(elements[1].text),
            "total": data_map.get(lot_name)["total"],
            "type": data_map.get(lot_name)["type"],
            "address": data_map.get(lot_name)["address"],
            "coords": geodata.coords(lot_name),
            "state": "nodata",
            "id": generate_id(__file__, lot_name),
            "forecast": False
        })

    return data
Example #4
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": convert_date(soup.p.string, "(%d.%m.%Y, %H.%M Uhr)"),
        "lots": []
    }

    # get all lots
    raw_lots = soup.find_all("tr")

    for raw_lot in raw_lots:
        elements = raw_lot.find_all("td")

        state = "open"
        if "class" in raw_lot.attrs and "strike" in raw_lot["class"]:
            state = "closed"

        lot_name = elements[0].text

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "name": lot.name,
            "free": int(elements[1].text),
            "total": lot.total,
            "lot_type": lot.type,
            "address": lot.address,
            "coords": lot.coords,
            "state": state,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #5
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lot_table_trs = soup.select("div#parkingList table")[0].find_all("tr")
    date_field = soup.find(id="lastRefresh").text.strip()

    data = {
        "last_updated": convert_date(date_field, "%d.%m.%Y %H:%M Uhr"),
        "lots": []
    }

    for tr in lot_table_trs[1:-1]:
        tds = tr.find_all("td")
        type_and_name = process_name(tds[0].text.strip())
        lot = geodata.lot(tds[0].text.strip())
        data["lots"].append({
            "name": type_and_name[1].strip("\n"),
            "lot_type": type_and_name[0],
            "free": int(tds[1].text),
            "total": lot.total,
            "state": state_map.get(tds[2].text, ""),
            "coords": lot.coords,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #6
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    free_lots = soup.find_all("td", {"class": "stell"})
    assert len(free_lots) == 6, "Expect to find 6 lots in Bonn, got: %d" % len(
        free_lots)
    time = soup.find("td", {"class": "stand"}).text.strip()

    lots = []
    for idx, free in enumerate(free_lots):
        lot = lot_map.get(idx)
        lots.append({
            "name": lot.name,
            "coords": geodata.coords(lot.name),
            "free": int(free.text),
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": generate_id(__file__, lot.name),
            "forecast": False
        })

    return {
        "last_updated": convert_date(time, "%d.%m.%y %H:%M:%S"),
        "data_source": data_source,
        "lots": lots
    }
Example #7
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lot_table_trs = soup.select("div#parkingList table")[0].find_all("tr")
    date_field = soup.find(id="lastRefresh").text.strip()

    data = {
        "last_updated": convert_date(date_field, "%d.%m.%Y %H:%M Uhr"),
        "lots": []
    }

    for tr in lot_table_trs[1:-1]:
        tds = tr.find_all("td")
        type_and_name = process_name(tds[0].text.strip())
        lot = geodata.lot(tds[0].text.strip())
        data["lots"].append({
            "name": type_and_name[1].strip("\n"),
            "lot_type": type_and_name[0],
            "free": int(tds[1].text),
            "total": lot.total,
            "state": state_map.get(tds[2].text, ""),
            "coords": lot.coords,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #8
0
def parse_html(text_content):

    elems = text_content.split("\r\n\r\n")

    data = {
        "last_updated": convert_date(elems[0], "%d-%m-%Y %H:%M:%S "),
        "lots": []
    }

    state_mappings = {1: "open", 0: "closed"}

    for elem in elems[1:]:
        e = {
            "name": elem.split("\r\n")[0].split("=")[1],
            "free": int(elem.split("\r\n")[1].split("=")[1])
        }
        lot = geodata.lot(e["name"])
        data["lots"].append({
            "name": e["name"],
            "free": e["free"],
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": "unknown",
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
Example #9
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    free_lots = soup.find_all("td", {"class": "stell"})
    assert len(free_lots) == 6, "Expect to find 6 lots in Bonn, got: %d" % len(free_lots)
    time = soup.find("td", {"class": "stand"}).text.strip()

    lots = []
    for idx, free in enumerate(free_lots):
        lot = lot_map.get(idx)
        lots.append({
            "name": lot.name,
            "coords": geodata.coords(lot.name),
            "free": int(free.text),
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": generate_id(__file__, lot.name),
            "forecast": False
        })

    return {
        "last_updated": convert_date(time, "%d.%m.%y %H:%M:%S"),
        "data_source": data_source,
        "lots": lots
    }
Example #10
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    lot_table_trs = soup.select("table[cellpadding=5]")[0].find_all("tr")

    data = {
        "last_updated": convert_date(lot_table_trs[-1].text.strip(), "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    for tr in lot_table_trs[1:-1]:
        tds = tr.find_all("td")
        type_and_name = process_name(tds[0].text)
        data["lots"].append({
            "name": type_and_name[1],
            "type": type_and_name[0],
            "free": int(tds[1].text),
            "total": total_number_map.get(tds[0].text, 0),
            "state": state_map.get(tds[2].text, ""),
            "coords": geodata.coords(type_and_name[1]),
            "id": generate_id(__file__, type_and_name[1]),
            "forecast": False
        })

    return data
Example #11
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    data = {
        "lots": [],
        "data_source": data_source,
        "last_updated": convert_date(soup.find(id="P1_LAST_UPDATE").text, "%d.%m.%Y %H:%M:%S")
    }

    for table in soup.find_all("table"):
        if table["summary"] != "":
            region = table["summary"]

            for lot_row in table.find_all("tr"):
                if lot_row.find("th") is not None:
                    continue

                state_div = lot_row.find("div")
                if "green" in state_div["class"]:
                    state = "open"
                elif "yellow" in state_div["class"]:
                    state = "open"
                elif "red" in state_div["class"]:
                    state = "open"
                elif "park-closed" in state_div["class"]:
                    state = "closed"
                else:
                    state = "nodata"

                lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text

                try:
                    free = int(lot_row.find("td", {"headers": "FREI"}).text)
                except ValueError:
                    free = 0

                try:
                    total = int(lot_row.find("td", {"headers": "KAPAZITAET"}).text)
                except ValueError:
                    total = get_most_lots_from_known_data("Dresden", lot_name)

                id = generate_id(__file__, lot_name)
                forecast = os.path.isfile("forecast_data/" + id + ".csv")

                data["lots"].append({
                    "coords": geodata.coords(lot_name),
                    "name": lot_name,
                    "total": total,
                    "free": free,
                    "state": state,
                    "id": id,
                    "lot_type": type_map.get(lot_name, ""),
                    "address": address_map.get(lot_name, ""),
                    "forecast": forecast,
                    "region": region
                })

    return data
Example #12
0
def parse_website_app(html):
    soup = BeautifulSoup(html, "html.parser")
    date_field = soup.find(id="P1_LAST_UPDATE").text
    last_updated = convert_date(date_field, "%d.%m.%Y %H:%M:%S")
    data = {
        "lots": [],
        "last_updated": last_updated
    }

    for table in soup.find_all("table"):
        if table["summary"] != "":
            region = table["summary"]
            if region == "Busparkplätze":
                continue

            for lot_row in table.find_all("tr"):
                if lot_row.find("th") is not None:
                    continue

                cls = lot_row.find("div")["class"]
                state = "nodata"
                if "green" in cls or "yellow" in cls or "red" in cls:
                    state = "open"
                elif "park-closed" in cls:
                    state = "closed"

                lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text

                try:
                    col = lot_row.find("td", {"headers": "FREI"})
                    free = int(col.text)
                except ValueError:
                    free = 0

                try:
                    col = lot_row.find("td", {"headers": "KAPAZITAET"})
                    total = int(col.text)
                except ValueError:
                    total = get_most_lots_from_known_data("Dresden", lot_name)

                lot = geodata.lot(lot_name)
                forecast = os.path.isfile("forecast_data/" + lot.id + ".csv")

                data["lots"].append({
                    "coords": lot.coords,
                    "name": lot_name,
                    "total": total,
                    "free": free,
                    "state": state,
                    "id": lot.id,
                    "lot_type": lot.type,
                    "address": lot.address,
                    "forecast": forecast,
                    "region": region
                })
    return data
Example #13
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    # last update time (UTC)
    try:
        update_time = convert_date(soup.select('p > strong')[-1].text, "Stand: %d.%m.%Y - %H:%M:%S")
    except ValueError:
        update_time = convert_date(soup.select('p > strong')[-2].text, "Stand: %d.%m.%Y - %H:%M:%S")

    data = {
        "last_updated": update_time,
        "data_source": data_source,
        "lots": []
    }

    # get all tables with lots
    raw_lot_list = soup.find_all("div", {"class": "listing"})

    # get all lots
    for lot_list in raw_lot_list:
        raw_lots = lot_list.select('tr + tr')

        for lot in raw_lots:
            lot_name = lot.select('a')[0].text

            try:
                lot_free = int(lot.select('td + td')[0].text)
                lot_state = "open" if "green" in str(lot.select("td + td")[0]) else "closed"
            except ValueError:
                lot_free = 0
                lot_state = "nodata"

            data["lots"].append({
                "name": lot_name,
                "free": lot_free,
                "total": total_number_map.get(lot_name, 0),
                "coords": geodata.coords(lot_name),
                "state": lot_state,
                "id": generate_id(__file__, lot_name),
                "forecast": False
            })

    return data
Example #14
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    date_field = soup.find(id="P1_LAST_UPDATE").text
    last_updated = convert_date(date_field, "%d.%m.%Y %H:%M:%S")
    data = {
        "lots": [],
        "last_updated": last_updated
    }

    for table in soup.find_all("table"):
        if table["summary"] != "":
            region = table["summary"]

            for lot_row in table.find_all("tr"):
                if lot_row.find("th") is not None:
                    continue

                cls = lot_row.find("div")["class"]
                state = "nodata"
                if "green" in cls or "yellow" in cls or "red" in cls:
                    state = "open"
                elif "park-closed" in cls:
                    state = "closed"

                lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text

                try:
                    col = lot_row.find("td", {"headers": "FREI"})
                    free = int(col.text)
                except ValueError:
                    free = 0

                try:
                    col = lot_row.find("td", {"headers": "KAPAZITAET"})
                    total = int(col.text)
                except ValueError:
                    total = get_most_lots_from_known_data("Dresden", lot_name)

                lot = geodata.lot(lot_name)
                forecast = os.path.isfile("forecast_data/" + lot.id + ".csv")

                data["lots"].append({
                    "coords": lot.coords,
                    "name": lot_name,
                    "total": total,
                    "free": free,
                    "state": state,
                    "id": lot.id,
                    "lot_type": lot.type,
                    "address": lot.address,
                    "forecast": forecast,
                    "region": region
                })

    return data
Example #15
0
def parse_html(html):
    # BeautifulSoup is a great and easy way to parse the html and
    # find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated
    last_updated = str(soup.select("body"))
    start = str.find(last_updated, "Letzte Aktualisierung:") + 23
    last_updated = last_updated[start:start + 16]

    data = {
        # convert_date is a utility function
        # you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M"),
        "lots": []
    }

    status_map = {
        "Offen": "open",
        "Geschlossen": "closed"
    }

    for tr in soup.find_all("tr"):
        if tr.td is None:
            continue
        td = tr.findAll('td')
        parking_name = td[0].string
        # work-around for the Umlaute-problem: ugly but working
        if ( 'Heiligengeist-' in parking_name) : parking_name = 'Heiligengeist-Höfe'
        elif ( 'Schlossh' in parking_name) : parking_name = 'Schlosshöfe'
        # get the data
        lot = geodata.lot(parking_name)
        try:
            parking_state = 'open'
            parking_free  = 0
            if ( 'Geschlossen' in td[3].text ) : 
                parking_state = 'closed'
            else :
                parking_free = int(td[1].text)
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name":     parking_name,
            "free":     parking_free,
            "total":    lot.total,
            "address":  lot.address,
            "coords":   lot.coords,
            "state":    parking_state,
            "lot_type": lot.type,
            "id":       lot.id,
            "forecast": False
        })

    return data
Example #16
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": convert_date(soup.find("tr").find("strong").text, "Stand: %d.%m.%Y, %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    rows = soup.find_all("tr")
    rows = rows[1:]
    region_header = ""

    for row in rows:
        if len(row.find_all("th")) > 0:
            # This is a header row, save it for later
            region_header = row.find("th", {"class": "head1"}).text

        else:
            if row.find("td").text == "Gesamt":
                continue

            # This is a parking lot row
            raw_lot_data = row.find_all("td")

            if len(raw_lot_data) == 2:
                type_and_name = process_name(raw_lot_data[0].text)
                data["lots"].append({
                    "name": type_and_name[1],
                    "type": type_and_name[0],
                    "total": get_most_lots_from_known_data("Lübeck", type_and_name[1]),
                    "free": 0,
                    "region": region_header,
                    "state": process_state_map.get(raw_lot_data[1].text, ""),
                    "coords": geodata.coords(type_and_name[1]),
                    "id": generate_id(__file__, type_and_name[1]),
                    "forecast": False
                })

            elif len(raw_lot_data) == 4:
                type_and_name = process_name(raw_lot_data[0].text)
                data["lots"].append({
                    "name": type_and_name[1],
                    "type": type_and_name[0],
                    "total": int(raw_lot_data[1].text),
                    "free": int(raw_lot_data[2].text),
                    "region": region_header,
                    "state": "open",
                    "coords": geodata.coords(type_and_name[1]),
                    "id": generate_id(__file__, type_and_name[1]),
                    "forecast": False
                })

    return data
Example #17
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    # soup = BeautifulSoup(html, "html.parser")

    # read the JSON-file:
    #      URL (removed: + no caching)
    urlHD = "http://parken.heidelberg.de/api-v1/parking-location?api_key=H5WaIyR4lgn6wzo7rJf8u4ubecgpX0Q8" #"&nc="+str(random.random())
    headerHD={'Accept': 'application/json; charset=utf-8',
              'User-Agent': 'ParkAPI v%s - Info: %s' %(env.SERVER_VERSION, env.SOURCE_REPOSITORY) }
    req = urllib.request.Request(url=urlHD, headers=headerHD)
    webURL = urllib.request.urlopen(req)
    data=webURL.read()
    dataJSON=json.loads(data.decode('utf-8'))

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(dataJSON['data']['updated'].split("+")[0][:-1], '%a, %d %b %Y %H:%M:%S'),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    # iteration over single parking_lots
    for parking_lot in dataJSON['data']['parkinglocations'] :
        # please keep the name in the geojson-file in the same form as delivered here (including spaces)
        parking_name = 'P'+str(parking_lot['uid'])+' '+parking_lot['name']
        # get the data
        lot = geodata.lot(parking_name)

        parking_state = 'open'
        parking_free = 0
        try :
            if ( parking_lot['parkingupdate']['status'] == 'closed' ) :
                parking_state = 'closed'
            else :
                parking_free = int(parking_lot['parkingupdate']['total']) - int(parking_lot['parkingupdate']['current'])
        except :
            parking_state = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
Example #18
0
def parse_html(html):
    data = json.loads(html)
    lots = {"lots": [], "last_updated": None}
    id_lots = {}
    for l in geodata.lots:
        aux = json.loads(geodata.lots[l].aux)
        id_lots[aux["identifier"]] = {
            "lot": geodata.lots[l],
            "open": aux["open"]
        }
    timestamps = []
    for feature in data["features"]:
        try:
            if id_lots[feature["attributes"]["IDENTIFIER"]]["open"]:
                state = "open"
            else:
                if feature["attributes"]["KAPAZITAET"] == -1:
                    state = "nodata"
                else:
                    state = "unknown"
            lot = id_lots[feature["attributes"]["IDENTIFIER"]]["lot"]
            lots["lots"].append({
                "coords":
                lot.coords,
                "name":
                lot.name,
                "total":
                int(lot.total),
                "free":
                int(feature["attributes"]["KAPAZITAET"]),
                "state":
                state,
                "id":
                lot.id,
                "lot_type":
                lot.type,
                "address":
                lot.address,
                "forecast":
                False,
                "region":
                ""
            })
            timestamps.append(
                convert_date(feature["attributes"]["TIMESTAMP"],
                             "%Y-%m-%d %H:%M:%S"))
        except (KeyError, ValueError):
            pass
    timestamps.sort()
    timestamps.reverse()
    lots["last_updated"] = timestamps[0]
    return lots
Example #19
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": '',  # will fill this later
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    #                                                                        Datum: 22.07.2019 - Uhrzeit: 16:57
    data['last_updated'] = convert_date(
        soup.find('div', class_='col-sm-12').text,
        'Datum: %d.%m.%Y - Uhrzeit: %H:%M')

    parking_lots = soup.find_all('div', class_='row carparkContent')
    for one_parking_lot in parking_lots:
        park_temp1 = one_parking_lot.find('div',
                                          class_='carparkLocation col-sm-9')
        park_temp2 = park_temp1.find('a')
        if (park_temp2 != None):
            parking_name = park_temp2.text
        else:
            parking_name = park_temp1.text.strip()
        lot = geodata.lot(parking_name)

        parking_free = 0
        parking_state = 'open'
        try:
            # text: Freie Parkplätze: 195
            parking_free_temp = one_parking_lot.find(
                'div', class_='col-sm-5').text.split()
            # parking_free_temp: ['Freie', 'Parkplätze:', '195']
            parking_free = int(parking_free_temp[2])
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #20
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    date_field = soup.find("tr").find("strong").text
    last_updated = convert_date(date_field, "Stand: %d.%m.%Y, %H:%M Uhr")
    data = {
        "last_updated": last_updated,
        "lots": []
    }

    rows = soup.find_all("tr")
    rows = rows[1:]
    region_header = ""

    for row in rows:
        if len(row.find_all("th")) > 0:
            # This is a header row, save it for later
            region_header = row.find("th", {"class": "head1"}).text
        else:
            if row.find("td").text == "Gesamt":
                continue

            # This is a parking lot row
            raw_lot_data = row.find_all("td")

            type_and_name = process_name(raw_lot_data[0].text)

            if len(raw_lot_data) == 2:
                total = get_most_lots_from_known_data("Lübeck",
                                                      type_and_name[1])
                free = 0
                state = process_state_map.get(raw_lot_data[1].text, "")
            elif len(raw_lot_data) == 4:
                total = int(raw_lot_data[1].text)
                free = int(raw_lot_data[2].text)
                state = "open"

            lot = geodata.lot(type_and_name[1])
            data["lots"].append({
                "name": lot.name,
                "lot_type": type_and_name[0],
                "total": total,
                "free": free,
                "region": region_header,
                "state": state,
                "coords": lot.coords,
                "id": lot.id,
                "forecast": False
            })

    return data
Example #21
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": '',
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    try:
        # <div class="container-fluid"
        parking_data = soup.find('div', class_='container-fluid')
        # Letzte Aktualisierung: 04.07.2019 11:03:00
        last_updated = convert_date(
            parking_data.find('h5').text,
            'Letzte Aktualisierung: %d.%m.%Y %H:%M:%S')
        data["last_updated"] = last_updated
    except:
        # if the service is unavailable (did happen in one of my tests):
        data["last_updated"] = utc_now()
        # return data

    parking_lots = parking_data.find_all('div', class_='well')
    for one_parking_lot in parking_lots:
        parking_name = one_parking_lot.find('b').text.strip()
        lot = geodata.lot(parking_name)
        parking_free = 0
        try:
            parking_status = 'open'
            parking_free = int(
                one_parking_lot.find_all(
                    'div', role='progressbar')[1].find('b').text.strip())
        except:
            parking_status = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_status,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #22
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": '',
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    #                                                       <b>Stand: 13.08.2019 16:40:00 Uhr</b> (Aktualisierung alle 60 Sekunden)<br>
    data['last_updated'] = convert_date(
        soup.find('b').text, 'Stand: %d.%m.%Y %H:%M:%S Uhr')

    entries = soup.find('table', class_='tabellenformat')
    entries_rows = entries.find_all('tr')
    # first line: header
    for one_entry in entries_rows[1:]:
        one_entry_data = one_entry.find_all('td')
        parking_name = one_entry_data[0].text
        lot = geodata.lot(parking_name)

        parking_free = 0
        parking_total = 0
        try:
            parking_total = int(one_entry_data[1].text)
            if (one_entry_data[5].text.__eq__('Offen')):
                parking_status = 'open'
                parking_free = int(one_entry_data[3].text)
            elif (one_entry_data[5].text.__eq__('Geschlossen')):
                parking_status = 'closed'
            else:
                parking_status = 'nodata'
        except:
            parking_status = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": parking_total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_status,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #23
0
def parse_website(html):
    soup = BeautifulSoup(html, "html.parser")
    for h3 in soup.find_all("h3"):
        if h3.text == "Letzte Aktualisierung":
            last_updated = convert_date(h3.find_next_sibling("div").text, "%d.%m.%Y %H:%M:%S")
    data = {
        "lots": [],
        "last_updated": last_updated
    }
    for table in soup.find_all("table"):
        thead = table.find("thead")
        if not thead:
            continue
        region = table.find("thead").find("tr").find_all("th")[1].find("div").text
        if region == "Busparkplätze":
            continue
        for tr in table.find("tbody").find_all("tr"):
            td = tr.find_all("td")
            name = tr.find("a").text
            lot = geodata.lot(name)
            try:
                total = int(td[2].find_all("div")[1].text)
            except ValueError:
                total = get_most_lots_from_known_data("Dresden", name)
            try:
                free = int(td[3].find_all("div")[1].text)
                valid_free = True
            except ValueError:
                valid_free = False
                free = 0
            if "park-closed" in td[0]["class"]:
                state = "closed"
            elif "blue" in td[0]["class"] and not valid_free:
                state = "nodata"
            else:
                state = "open"
            data["lots"].append({
                "coords": lot.coords,
                "name": name,
                "total": total,
                "free": free,
                "state": state,
                "id": lot.id,
                "lot_type": lot.type,
                "address": lot.address,
                "forecast": os.path.isfile("forecast_data/" + lot.id + ".csv"),
                "region": region
            })
    return data
Example #24
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    date_field = soup.find("tr").find("strong").text
    last_updated = convert_date(date_field, "Stand: %d.%m.%Y, %H:%M Uhr")
    data = {"last_updated": last_updated, "lots": []}

    rows = soup.find_all("tr")
    rows = rows[1:]
    region_header = ""

    for row in rows:
        if len(row.find_all("th")) > 0:
            # This is a header row, save it for later
            region_header = row.find("th", {"class": "head1"}).text
        else:
            if row.find("td").text == "Gesamt":
                continue

            # This is a parking lot row
            raw_lot_data = row.find_all("td")

            type_and_name = process_name(raw_lot_data[0].text)

            if len(raw_lot_data) == 2:
                total = get_most_lots_from_known_data("Lübeck",
                                                      type_and_name[1])
                free = 0
                state = process_state_map.get(raw_lot_data[1].text, "")
            elif len(raw_lot_data) == 4:
                total = int(raw_lot_data[1].text)
                free = int(raw_lot_data[2].text)
                state = "open"

            lot = geodata.lot(type_and_name[1])
            data["lots"].append({
                "name": lot.name,
                "lot_type": type_and_name[0],
                "total": total,
                "free": free,
                "region": region_header,
                "state": state,
                "coords": lot.coords,
                "id": lot.id,
                "forecast": False
            })

    return data
Example #25
0
def parse_html(source_json):

    parsed_json = json.loads(source_json)
    features = parsed_json['features']

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = ""

    data = {
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    for feature in features:
        lot_name = feature['properties']['park_name']
        lot_free = int(feature['properties']['obs_free'])
        lot_total = int(feature['properties']['obs_max'])

        obs_ts = feature['properties']['obs_ts'].split('.')[0]
        if last_updated < obs_ts:
            last_updated = obs_ts

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = "nodata"

        if feature['properties']['obs_state'] == "1":
            state = "open"
        elif feature['properties']['obs_state'] == "0":
            state = "closed"

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "name": lot.name,
            "free": lot_free,
            "total": lot_total,
            "address": lot.address,
            "coords": lot.coords,
            "state": state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    data['last_updated'] = convert_date(last_updated, "%Y-%m-%d %H:%M:%S")

    return data
Example #26
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    data = {
        "last_updated": '',  # will fill this later
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    # suche: <div id="parkhausliste-ct">
    div_level1 = soup.find('div', id='parkhausliste-ct')
    # <p style="color: #7a7a7b; padding: 18px 0 8px 0">zuletzt aktualisiert am 19.06.2019, 15:27 Uhr</p>
    date_time = div_level1.find('p')
    data['last_updated'] = convert_date(
        date_time.text, 'zuletzt aktualisiert am %d.%m.%Y, %H:%M Uhr')

    # find all entries:
    div_level2 = div_level1.find('div')
    div_level3 = div_level2.find_all('div')
    count = 0
    while (count < len(div_level3) - 2):
        parking_name = div_level3[count + 1].text.strip()
        lot = geodata.lot(parking_name)
        parking_free = 0
        parking_state = 'open'
        try:
            parking_free = int(div_level3[count + 2].text)
        except:
            parking_state = 'nodata'
        count += 3

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False
        })

    return data
Example #27
0
def parse_html(html):

    dataJSON = json.loads(html)

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated":
        convert_date(dataJSON['data']['updated'].split("+")[0][:-1],
                     '%a, %d %b %Y %H:%M:%S'),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    # iteration over single parking_lots
    for parking_lot in dataJSON['data']['parkinglocations']:
        # please keep the name in the geojson-file in the same form as delivered here (including spaces)
        parking_name = 'P' + str(
            parking_lot['uid']) + ' ' + parking_lot['name']
        # get the data
        lot = geodata.lot(parking_name)

        parking_state = 'open'
        parking_free = 0
        try:
            if (parking_lot['parkingupdate']['status'] == 'closed'):
                parking_state = 'closed'
            else:
                parking_free = int(
                    parking_lot['parkingupdate']['total']) - int(
                        parking_lot['parkingupdate']['current'])
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name": parking_name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
Example #28
0
def parse_html(source_json):

    parsed_json = json.loads(source_json)
    features = parsed_json['features']

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = ""

    data = {
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    for feature in features:
        lot_name = feature['properties']['park_name']
        lot_free = int(feature['properties']['obs_free'])
        lot_total = int(feature['properties']['obs_max'])

        if last_updated < feature['properties']['obs_ts']:
            last_updated = feature['properties']['obs_ts']

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = "nodata"

        if feature['properties']['obs_state'] == "1":
            state = "open"
        elif feature['properties']['obs_state'] == "0":
            state = "closed"

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "name": lot.name,
            "free": lot_free,
            "total": lot_total,
            "address": lot.address,
            "coords": lot.coords,
            "state": state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    data['last_updated'] = convert_date(last_updated, "%Y-%m-%d %H:%M:%S")

    return data
Example #29
0
def parse_html(text_content):
    data_as_json = json.loads(text_content)

    # more data about the available parking spaces can be found at
    # http://odensedataplatform.dk/dataset/parkering

    # the service doesn't actually publish the last date it was updated,
    # so we will assume the data has just been updated
    last_updated = datetime.now().strftime("%d.%m.%Y %H:%M")
    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M"),
        "lots": []
    }

    for _, parking in data_as_json.items():
        lot_code = parking["idName"]
        name = parking["name"]
        total = parking["maxCount"]
        free = parking["freeCount"]

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = "unknown"

        lot = geodata.lot(lot_code)

        # this is to make sure that we don't include erroneous totals from the JSON file,
        # see the parking filosoffen_q_park_(ski_data) which outputs a total of 9999
        if lot.total < total:
            total = lot.total

        data["lots"].append({
            "name": name,
            "free": free,
            "total": total,
            "address": None,
            "coords": lot.coords,
            "state": state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
Example #30
0
def parse_html(text_content):
    # the original JSON is invalid, let's fix it
    p = re.compile(r'([^"]|\s)([a-zA-Z]+)\s?(:)')
    text_content = text_content.replace("'", "\"")
    text_content = re.sub(p, r'\1"\2"\3', text_content)
    data_as_json = json.loads(text_content)

    # the source doesn't publish the update time, so we assume present
    last_updated = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
    data = {
        "last_updated": convert_date(last_updated, "%Y/%m/%d %H:%M:%S"),
        "lots": []
    }

    state_mappings = {
        1: "open",
        0: "closed"
    }

    for record in data_as_json["parkPlacesAreaMarkers"]:
        lot_name = record["Name"]
        free = int(record["FreeCount"])
        total = int(record["MaxCount"])

        # the JSON file contains parking lots for which counting does work, let's ignore them
        if total > 0:
            latitude = record["Latitude"]
            longitude = record["Longitude"]
            state_key = int(record["IsOpen"])
            state = state_mappings[state_key]

            lot = geodata.lot(lot_name)
            data["lots"].append({
                "name": lot_name,
                "free": free,
                "total": total,
                "address": lot.address,
                "coords": lot.coords,
                "state": state,
                "lot_type": lot.type,
                "id": lot.id,
                "forecast": False,
            })

    return data
Example #31
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    #   suche: <p class="updateinfo">zuletzt aktualisiert: 28.05.2019 15.30 Uhr</p>
    updated = soup.find("p", class_="updateinfo")
    last_updated = convert_date(updated.text,
                                'zuletzt aktualisiert: %d.%m.%Y %H.%M Uhr')

    data = {
        "last_updated": last_updated,
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    parking_lots = soup.find_all("div", class_="accordeon parkmoeglichkeit")
    for one_lot in parking_lots:
        parking_name = one_lot.find("h3").text
        lot = geodata.lot(parking_name)

        parking_state = 'open'
        parking_free = 0
        try:
            parking_belegung = one_lot.find("div", class_="belegung")
            if (parking_belegung != None):
                parking_free = int(parking_belegung.find("strong").text)
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name": lot.name,
            "free": parking_free,
            "total": lot.total,
            "address": lot.address,
            "coords": lot.coords,
            "state": parking_state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
Example #32
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = soup.find('h2').text

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        #                                          Stand: 07.06.2019 15:46 Uhr
        "last_updated": convert_date(last_updated, "Stand: %d.%m.%Y %H:%M Uhr"),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    # find all entries
    all_parking_lots = soup.find_all('dl')
    for parking_lot in all_parking_lots : 
        parking_name = parking_lot.find('dt').text
        lot = geodata.lot(parking_name)

        try :
            parking_state = 'open'
            parking_free = int(parking_lot.find('dd').find('strong').text)
        except :
            parking_state = 'nodata'
            parking_free = 0

        data["lots"].append({
                "name":     parking_name,
                "free":     parking_free,
                "total":    lot.total,
                "address":  lot.address,
                "coords":   lot.coords,
                "state":    parking_state,
                "lot_type": lot.type,
                "id":       lot.id,
                "forecast": False,
            })

    return data
Example #33
0
def parse_html(text_content):
    # the original JSON is invalid, let's fix it
    p = re.compile(r'([^"]|\s)([a-zA-Z]+)\s?(:)')
    text_content = text_content.replace("'", "\"")
    text_content = re.sub(p, r'\1"\2"\3', text_content)
    data_as_json = json.loads(text_content)

    # the source doesn't publish the update time, so we assume present
    last_updated = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
    data = {
        "last_updated": convert_date(last_updated, "%Y/%m/%d %H:%M:%S"),
        "lots": []
    }

    state_mappings = {1: "open", 0: "closed"}

    for record in data_as_json["parkPlacesAreaMarkers"]:
        lot_name = record["Name"]
        free = int(record["FreeCount"])
        total = int(record["MaxCount"])

        # the JSON file contains parking lots for which counting does work, let's ignore them
        if total > 0:
            latitude = record["Latitude"]
            longitude = record["Longitude"]
            state_key = int(record["IsOpen"])
            state = state_mappings[state_key]

            lot = geodata.lot(lot_name)
            data["lots"].append({
                "name": lot_name,
                "free": free,
                "total": total,
                "address": lot.address,
                "coords": lot.coords,
                "state": state,
                "lot_type": lot.type,
                "id": lot.id,
                "forecast": False,
            })

    return data
Example #34
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html)

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = soup.select("p#last_updated")[0].text

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    for tr in soup.find_all("tr"):
        lot_name = tr.find("td", {"class": "lot_name"}).text
        lot_free = tr.find("td", {"class": "lot_free"}).text
        lot_total = tr.find("td", {"class": "lot_total"}).text
        lot_address = tr.find("td", {"class": "lot_address"}).text
        lot_type = tr.find("td", {"class": "lot_type"}).text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = tr.find("td", {"class": "lot_state"}).text

        data["lots"].append({
            "name": lot_name,
            "free": lot_free,
            "total": lot_total,
            "address": lot_address,
            "coords": geodata.coords(lot_name),
            "state": state,
            "type": lot_type,
            # use the utility function generate_id to generate an ID for this lot
            # it takes this file path and the lot's name as params
            "id": generate_id(__file__, lot_name),
            "forecast": False,
        })

    return data
Example #35
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html)

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = soup.select("p#last_updated")[0].text

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "data_source": data_source,
        "lots": []
    }

    for tr in soup.find_all("tr"):
        lot_name = tr.find("td", {"class": "lot_name"}).text
        lot_free = tr.find("td", {"class": "lot_free"}).text
        lot_total = tr.find("td", {"class": "lot_total"}).text
        lot_address = tr.find("td", {"class": "lot_address"}).text
        lot_type = tr.find("td", {"class": "lot_type"}).text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = tr.find("td", {"class": "lot_state"}).text

        data["lots"].append({
            "name": lot_name,
            "free": lot_free,
            "total": lot_total,
            "address": lot_address,
            "coords": geodata.coords(lot_name),
            "state": state,
            "type": lot_type,
            # use the utility function generate_id to generate an ID for this lot
            # it takes this file path and the lot's name as params
            "id": generate_id(__file__, lot_name),
            "forecast": False,
        })

    return data
Example #36
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = soup.select("p#last_updated")[0].text

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    for tr in soup.find_all("tr"):
        lot_name = tr.find("td", {"class": "lot_name"}).text
        lot_free = tr.find("td", {"class": "lot_free"}).text
        lot_total = tr.find("td", {"class": "lot_total"}).text

        # please be careful about the state only being allowed to contain either open, closed or nodata
        # should the page list other states, please map these into the three listed possibilities
        state = tr.find("td", {"class": "lot_state"}).text

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "name": lot.name,
            "free": lot_free,
            "total": lot_total,
            "address": lot.address,
            "coords": lot.coords,
            "state": state,
            "lot_type": lot.type,
            "id": lot.id,
            "forecast": False,
        })

    return data
Example #37
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")
    
    # get page
    m = re.findall(r'Ob(.*von.*);', html)
    time = str(datetime.datetime.now())
    lots_tmp = {}
    
    for elem in m:
        name = re.search(r'"[ A-Za-zöüä\xfc0-9+-]+"', elem,
                             re.UNICODE).group(0)[1:-1]
        belegt = re.search(r'[\-0-9]+\)', elem).group(0)[0:-1]
        max = re.search(r'von.*,', elem).group(0)[5:-1]
        lots_tmp[name] = {"free": str(int(max)-int(belegt))}

    assert len(m) == 8, \
        "Expect to find 8 lots in Erfurt, got: %d" % len(m)

    lots = []
    for idx, free in enumerate(m):
        lot = geodata.lot(lot_map[idx])
        lots.append({
            "name": lot.name,
            "coords": lot.coords,
            "free": int(lots_tmp.get(name).get("free")),
            "address": lot.address,
            "total": lot.total,
            "state": "nodata",
            "id": lot.id,
            "forecast": False
        })

    return {
        "last_updated": convert_date(time.split('.')[0], "%Y-%m-%d %H:%M:%S"),
        "lots": lots
    }
Example #38
0
def parse_html(text_content):
    data_as_json = json.loads(text_content)

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = data_as_json["result"]["records"][0]["date"]
    data = {
        "last_updated": convert_date(last_updated, "%Y/%m/%d %H:%M:%S"),
        "lots": []
    }

    # The page at https://www.odaa.dk/dataset/parkeringshuse-i-aarhus describes how the counts are made
    map_json_names = {
        "NORREPORT": "Nørreport",
        # "SKOLEBAKKEN": None,
        "SCANDCENTER": "Scandinavian Center",
        "BRUUNS": "Bruuns Galleri",
        "MAGASIN": "Magasin",
        "KALKVAERKSVEJ": "Kalkværksvej",
        "SALLING": "Salling",
        "Navitas": "Navitas",
        "NewBusgadehuset": "Busgadehuset"
    }

    cummulatives = {"Urban Level 1": "Dokk1", "Urban Level 2+3": "Dokk1"}

    cumulative_lots = {}

    for record in data_as_json["result"]["records"]:
        lot_code = record["garageCode"]
        total = int(record["totalSpaces"])
        free = max(int(record["totalSpaces"]) - int(record["vehicleCount"]), 0)

        if lot_code not in map_json_names.keys(
        ) and lot_code not in cummulatives.keys():
            continue
        elif lot_code in map_json_names.keys():
            lot_name = map_json_names[lot_code]
            lot = geodata.lot(lot_name)
            data["lots"].append({
                "name": lot_name,
                "free": free,
                "total": total,
                "address": lot.address,
                "coords": lot.coords,
                "state": "unknown",
                "lot_type": lot.type,
                "id": lot.id,
                "forecast": False,
            })
        elif lot_code not in cummulatives.keys():
            lot_name = cummulatives[lot_code]
            if lot_name not in cumulative_lots.keys():
                cumulative_lots[lot_name] = {
                    "name": lot_name,
                    "free": free,
                    "total": total,
                    "address": lot.address,
                    "coords": lot.coords,
                    "state": "unknown",
                    "lot_type": lot.type,
                    "id": lot.id,
                    "forecast": False,
                }
            else:
                current_data = cumulative_lots[lot_name]
                cumulative_lots[lot_name] = {
                    "name": lot_name,
                    "free": current_data["free"] + free,
                    "total": current_data["total"] + total,
                    "address": lot.address,
                    "coords": lot.coords,
                    "state": "unknown",
                    "lot_type": lot.type,
                    "id": lot.id,
                    "forecast": False,
                }

    for lot in cumulative_lots:
        data["lots"].append(lot)

    return data
Example #39
0
def parse_html(html):
    soup = BeautifulSoup(html, "html.parser")

    stand=soup.select('span')
    # this gives you:
    # in stand[0]: <span style="font-weight: normal; letter-spacing: 0px;">
    #              Stand: 10.04.2019 15:09        </span>
    # splitting it gives you: u'10.04.2019', u'15:09'
    # putting it together: u'10.04.2019  15:09'
    last_updated_date=stand[0].text.strip().split()[1]
    last_updated_time=stand[0].text.strip().split()[2]
    last_updated = last_updated_date + "  " + last_updated_time

    data = {
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M"),
        "lots": []
    }

    # everything is in table-objects
    table=soup.select('table')
    # table[0] is a big table-object around everything
    # table[1] contains some headers
    # table[2] contains column-headers and one row for each parking-lot
    #          so we look in this for name and values
    td = table[2].find_all('td')
    i = 0
    while i < len(td)-4 :
        # for each row
        #    td[0] contains an image
        #    td[1] contains the name of the parking-lot
        #    td[2] contains the text 'geschlossen' or the values in the form xxx / xxx
        parking_name = td[i+1].text.strip()
        # work-around for the sz-problem: Coulinstraße
        if ( 'Coulinstr' in parking_name ) : parking_name = 'Coulinstraße'
        # get the data
        lot = geodata.lot(parking_name)
        try:
            parking_state = 'open'
            parking_free  = 0
            parking_total = 0
            if ( 'geschlossen' in td[i+2].text ) :
                parking_state = 'closed'
            else :
                parking_free = int(td[i+2].text.split()[0])
                parking_total = int(td[i+2].text.split()[2])
        except:
            parking_state = 'nodata'

        data["lots"].append({
            "name":     parking_name,
            "free":     parking_free,
            "total":    parking_total,
            "address":  lot.address,
            "coords":   lot.coords,
            "state":    parking_state,
            "lot_type": lot.type,
            "id":       lot.id,
            "forecast": False,
        })
        i += 5    # next parking-lot

    return data
Example #40
0
def parse_html(html):
    # BeautifulSoup is a great and easy way to parse the html and
    # find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated
    last_updated = str(soup.select("body"))
    start = str.find(last_updated, "Letzte Aktualisierung:") + 23
    last_updated = last_updated[start:start + 16] + ' Uhr'

    data = {
        # convert_date is a utility function
        # you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "lots": []
    }

    status_map = {"Offen": "open", "Geschlossen": "closed"}

    # Oldenburg does not send the totals on there website,
    # so wie take some Values from a 2011st PDF:
    # http://www.oldenburg.de/fileadmin/oldenburg/Benutzer/PDF/41/414/Parkplatz_Uebersicht2.pdf
    # and http://gis4oldenburg.oldenburg.de/?es=C12S77
    # what possible can go wrong ¯\_(ツ)_/¯
    lots_map = {
        "Waffenplatz": [650, "Waffenplatz 3"],
        "City": [440, "Staulinie 10"],
        "Galeria Kaufhof": [326, "Ritterstraße"],
        "Pferdemarkt": [401, "Pferdemarkt 13"],
        # CCO 1 & 2 are together only known together with 420,
        # but they seem to be somewhat like this
        "CCO Parkdeck 1": [190, "Heiligengeiststraße 4"],
        "CCO Parkdeck 2": [230, "Heiligengeiststraße 4"],
        "Hbf/ZOB": [358, "Karlstraße"],
        "Theaterwall": [125, "Theaterwall 4"],
        "Theatergarage": [107, "Roonstraße"],
        "Heiligengeist-Höfe": [275, "Georgstraße"],
        "Schlosshöfe": [430, "Mühlenstraße"],
    }

    for tr in soup.find_all("tr"):
        if tr.td is None:
            continue
        td = tr.findAll('td')
        lot_name = td[0].b.string
        lot_free = int(td[1].b.text)

        # get the values from the map above, or return zero
        # should trown an execption -> [email protected]
        lot_total = lots_map[lot_name][0]
        lot_address = lots_map[lot_name][1]

        # lot_type = tr.find("td").text

        # please be careful about the state only being allowed to contain
        # either open, closed or nodata should the page list other states,
        # please map these into the three listed possibilities
        state = status_map.get(td[3].text, "nodata")

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "id": lot.id,
            "name": lot.name,
            "free": lot_free,
            "state": state,
            "total": lot_total,
            "address": lot_address,
            "coords": lot.coords,
            # "type": lot_type,
            "forecast": False
        })
    return data
Example #41
0
def parse_html(html):
    # BeautifulSoup is a great and easy way to parse the html and
    # find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated
    last_updated = str(soup.select("body"))
    start = str.find(last_updated, "Letzte Aktualisierung:") + 23
    last_updated = last_updated[start:start + 16] + ' Uhr'

    data = {
        # convert_date is a utility function
        # you can use to turn this date into the correct string format
        "last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
        "lots": []
    }

    status_map = {
        "Offen": "open",
        "Geschlossen": "closed"
    }

    # Oldenburg does not send the totals on there website,
    # so wie take some Values from a 2011st PDF:
    # http://www.oldenburg.de/fileadmin/oldenburg/Benutzer/PDF/41/414/Parkplatz_Uebersicht2.pdf
    # and http://gis4oldenburg.oldenburg.de/?es=C12S77
    # what possible can go wrong ¯\_(ツ)_/¯
    lots_map = {
        "Waffenplatz": [650, "Waffenplatz 3"],
        "City": [440, "Staulinie 10"],
        "Galeria Kaufhof": [326, "Ritterstraße"],
        "Pferdemarkt": [401, "Pferdemarkt 13"],
        # CCO 1 & 2 are together only known together with 420,
        # but they seem to be somewhat like this
        "CCO Parkdeck 1": [190, "Heiligengeiststraße 4"],
        "CCO Parkdeck 2": [230, "Heiligengeiststraße 4"],
        "Hbf/ZOB": [358, "Karlstraße"],
        "Theaterwall": [125, "Theaterwall 4"],
        "Theatergarage": [107, "Roonstraße"],
        "Heiligengeist-Höfe": [275, "Georgstraße"],
        "Schlosshöfe": [430, "Mühlenstraße"],
    }

    for tr in soup.find_all("tr"):
        if tr.td is None:
            continue
        td = tr.findAll('td')
        lot_name = td[0].b.string
        lot_free = int(td[1].b.text)

        # get the values from the map above, or return zero
        # should trown an execption -> [email protected]
        lot_total = lots_map[lot_name][0]
        lot_address = lots_map[lot_name][1]

        # lot_type = tr.find("td").text

        # please be careful about the state only being allowed to contain
        # either open, closed or nodata should the page list other states,
        # please map these into the three listed possibilities
        state = status_map.get(td[3].text, "nodata")

        lot = geodata.lot(lot_name)
        data["lots"].append({
            "id": lot.id,
            "name": lot.name,
            "free": lot_free,
            "state": state,
            "total": lot_total,
            "address": lot_address,
            "coords": lot.coords,
            # "type": lot_type,
            "forecast": False
        })
    return data
Example #42
0
def parse_html(text_content):
    data_as_json = json.loads(text_content)

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    last_updated = data_as_json["result"]["records"][0]["date"]
    data = {
        "last_updated": convert_date(last_updated, "%Y/%m/%d %H:%M:%S"),
        "lots": []
    }

    # The page at https://www.odaa.dk/dataset/parkeringshuse-i-aarhus describes how the counts are made
    map_json_names = {
        "NORREPORT": "Nørreport",
        # "SKOLEBAKKEN": None,
        "SCANDCENTER": "Scandinavian Center",
        "BRUUNS": "Bruuns Galleri",
        "MAGASIN": "Magasin",
        "KALKVAERKSVEJ": "Kalkværksvej",
        "SALLING": "Salling",
        "Navitas": "Navitas",
        "NewBusgadehuset": "Busgadehuset"
    }

    cummulatives = {
        "Urban Level 1": "Dokk1",
        "Urban Level 2+3": "Dokk1"
    }

    cumulative_lots = {}

    for record in data_as_json["result"]["records"]:
        lot_code = record["garageCode"]
        total = int(record["totalSpaces"])
        free = max(int(record["totalSpaces"]) - int(record["vehicleCount"]), 0)

        if lot_code not in map_json_names.keys() and lot_code not in cummulatives.keys():
            continue
        elif lot_code in map_json_names.keys():
            lot_name = map_json_names[lot_code]
            lot = geodata.lot(lot_name)
            data["lots"].append({
                "name": lot_name,
                "free": free,
                "total": total,
                "address": lot.address,
                "coords": lot.coords,
                "state": "unknown",
                "lot_type": lot.type,
                "id": lot.id,
                "forecast": False,
            })
        elif lot_code not in cummulatives.keys():
            lot_name = cummulatives[lot_code]
            if lot_name not in cumulative_lots.keys():
                cumulative_lots[lot_name] = {
                    "name": lot_name,
                    "free": free,
                    "total": total,
                    "address": lot.address,
                    "coords": lot.coords,
                    "state": "unknown",
                    "lot_type": lot.type,
                    "id": lot.id,
                    "forecast": False,
                }
            else:
                current_data = cumulative_lots[lot_name]
                cumulative_lots[lot_name] = {
                    "name": lot_name,
                    "free": current_data["free"] + free,
                    "total": current_data["total"] + total,
                    "address": lot.address,
                    "coords": lot.coords,
                    "state": "unknown",
                    "lot_type": lot.type,
                    "id": lot.id,
                    "forecast": False,
                }

    for lot in cumulative_lots:
        data["lots"].append(lot)

    return data
Example #43
0
def parse_html(html):
    if geodata.private_data:
        api_data = json.loads(html)
        dt = time.strptime(api_data[0]["timestamp"].split(".")[0], "%Y-%m-%dT%H:%M:%S")
        ts = time.gmtime(time.mktime(dt))
        data = {
            "lots": [],
            "last_updated": time.strftime("%Y-%m-%dT%H:%M:%S", ts)
        }
        status = ['open', 'closed', 'unknown']
        id_lots = {geodata.lots[n].aux: geodata.lots[n] for n in geodata.lots}
        for dataset in api_data:
            try:
                lot = id_lots[dataset['id']]
                forecast = os.path.isfile("forecast_data/" + lot.id + ".csv")
                data["lots"].append({
                    "coords": lot.coords,
                    "name": lot.name,
                    "total": lot.total,
                    "free": max(lot.total - dataset["belegung"], 0),
                    "state": status[dataset["status"] - 1],
                    "id": lot.id,
                    "lot_type": lot.type,
                    "address": lot.address,
                    "forecast": forecast,
                    "region": ""
                })
            except KeyError:
                pass
    else:
        #use website
        soup = BeautifulSoup(html, "html.parser")
        date_field = soup.find(id="P1_LAST_UPDATE").text
        last_updated = convert_date(date_field, "%d.%m.%Y %H:%M:%S")
        data = {
            "lots": [],
            "last_updated": last_updated
        }

        for table in soup.find_all("table"):
            if table["summary"] != "":
                region = table["summary"]
                if region == "Busparkplätze":
                    continue

                for lot_row in table.find_all("tr"):
                    if lot_row.find("th") is not None:
                        continue

                    cls = lot_row.find("div")["class"]
                    state = "nodata"
                    if "green" in cls or "yellow" in cls or "red" in cls:
                        state = "open"
                    elif "park-closed" in cls:
                        state = "closed"

                    lot_name = lot_row.find("td", {"headers": "BEZEICHNUNG"}).text

                    try:
                        col = lot_row.find("td", {"headers": "FREI"})
                        free = int(col.text)
                    except ValueError:
                        free = 0

                    try:
                        col = lot_row.find("td", {"headers": "KAPAZITAET"})
                        total = int(col.text)
                    except ValueError:
                        total = get_most_lots_from_known_data("Dresden", lot_name)

                    lot = geodata.lot(lot_name)
                    forecast = os.path.isfile("forecast_data/" + lot.id + ".csv")

                    data["lots"].append({
                        "coords": lot.coords,
                        "name": lot_name,
                        "total": total,
                        "free": free,
                        "state": state,
                        "id": lot.id,
                        "lot_type": lot.type,
                        "address": lot.address,
                        "forecast": forecast,
                        "region": region
                    })
    return data
Example #44
0
def parse_html(html):

    # BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
    soup = BeautifulSoup(html, "html.parser")

    # last_updated is the date when the data on the page was last updated, it should be listed on most pages
    # suche:  <td width="233">
    date_time_text = soup.find('td', width='233').text.strip()

    data = {
        # convert_date is a utility function you can use to turn this date into the correct string format
        #                                            'Stand vom 05.06.2019, 14:40:20'
        "last_updated":
        convert_date(date_time_text, 'Stand vom %d.%m.%Y, %H:%M:%S'),
        # URL for the page where the scraper can gather the data
        "lots": []
    }

    # everything is in table-objects
    # so we have to go down several levels of table-objects
    html_level0 = soup.find('table')
    html_level1 = html_level0.find_all('table')
    html_level2 = html_level1[1].find_all('table')
    html_level3 = html_level2[0].find_all('table')
    html_level4 = html_level3[2].find_all('table')
    # here we have the data of the tables
    #   [0]: header
    #   [1]: empty
    #   all following: empty or Parkhaus
    for html_parkhaus in html_level4[2:]:
        if (html_parkhaus.text.strip() == ''): continue  # table is empty
        html_parkhaus_all_rows = html_parkhaus.find_all('tr')
        for html_parkhaus_row in html_parkhaus_all_rows:
            # one row: one parkhaus
            html_parkhaus_data = html_parkhaus_row.find_all('td')
            parking_name_list = html_parkhaus_data[1].text.split()
            parking_name = ''
            for parking_name_part in parking_name_list:
                if (parking_name != ''): parking_name += ' '
                parking_name += parking_name_part

            lot = geodata.lot(parking_name)
            parking_state = 'open'
            parking_free = 0
            try:
                parking_free = int(html_parkhaus_data[2].text)
            except:
                parking_state = 'nodata'

            data["lots"].append({
                "name": parking_name,
                "free": parking_free,
                "total": lot.total,
                "address": lot.address,
                "coords": lot.coords,
                "state": parking_state,
                "lot_type": lot.type,
                "id": lot.id,
                "forecast": False,
            })

    return data