def run(): timetable = 'https://www.somerset.gov.uk/libraries-leisure-and-communities/libraries/library-facilities/mobile-library/' mobiles = [] with open(DATA_SOURCE, 'r') as som_raw: mobreader = csv.reader(som_raw, delimiter=',', quotechar='"') next(mobreader, None) # skip the headers for row in mobreader: mobile_library = 'Mobile' frequency = 'FREQ=WEEKLY;INTERVAL=4' route = row[0].strip() community = row[1].strip() stop_name = row[2].strip() address = stop_name + ', ' + community postcode = row[3].strip() arrival = row[4].strip().replace('.', ':') departure = row[5].strip().replace('.', ':') day = row[6].strip() start = row[7].strip() postcode_request = requests.get(url) postcode_data = json.loads(postcode_request.text) latitude = postcode_data['result']['latitude'] longitude = postcode_data['result']['longitude'] mobiles.append([ mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file('Somerset', 'somerset.csv', mobiles)
def run(): timetable = 'https://www.portsmouth.gov.uk/ext/libraries/mobile-library' mobiles = [] with open(DATA_SOURCE, 'r') as port_raw: mobreader = csv.reader(port_raw, delimiter=',', quotechar='"') next(mobreader, None) # skip the headers for row in mobreader: mobile_library = 'Mobile' frequency = 'FREQ=WEEKLY;INTERVAL=1' route = row[0].strip() community = row[4].strip() stop_name = row[3].strip() address = stop_name + ', ' + community postcode = '' arrival = row[1].strip() departure = row[2].strip() day = row[0].strip() start = row[7].strip() latitude = row[5].strip() longitude = row[6].strip() mobiles.append([ mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file('Portsmouth', 'portsmouth.csv', mobiles)
def run(): timetable = 'https://www.milton-keynes.gov.uk/libraries/about-libraries/mobile-library-service' mobiles = [] with open(DATA_SOURCE, 'r') as mk_raw: mobreader = csv.reader(mk_raw, delimiter=',', quotechar='"') next(mobreader, None) # skip the headers for row in mobreader: day = row[0] arrival = row[1] departure = row[2] community = row[3] stop_name = row[4] postcode = row[5] start = row[6] address = stop_name + ', ' + community frequency = 'FREQ=WEEKLY;INTERVAL=1' url = 'https://api.postcodes.io/postcodes/' + postcode postcode_request = requests.get(url) postcode_data = json.loads(postcode_request.text) latitude = postcode_data['result']['latitude'] longitude = postcode_data['result']['longitude'] mobiles.append([ 'Mobile', day, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file('Milton Keynes', 'milton_keynes.csv', mobiles)
def run(): timetable = 'https://www.northyorks.gov.uk/supermobile-library' mobiles = [] with open(DATA_SOURCE, 'r') as raw: reader = csv.reader(raw, delimiter=',', quotechar='"') next(reader, None) # skip the headers for row in reader: day = row[0] arrival = row[3] departure = row[4] community = row[1] stop_name = row[2] postcode = '' start = row[5] address = stop_name + ', ' + community frequency = 'FREQ=WEEKLY;INTERVAL=2' latitude = row[6] longitude = row[7] mobiles.append([ 'Supermobile', day, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file('North Yorkshire', 'north_yorkshire.csv', mobiles)
def run(): timetable = 'https://www.birmingham.gov.uk/info/50163/library_services/1479/mobile_library_service/3' mobiles = [] with open(DATA_SOURCE, 'r') as raw: reader = csv.reader(raw, delimiter=',', quotechar='"') for row in reader: day = row[0] arrival = row[1] departure = row[2] community = row[3] stop_name = row[4] latitude = row[5] longitude = row[6] frequency = row[7] start = row[8] route = row[9] address = stop_name + ', ' + community mobiles.append([ 'Mobile', route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file('Birmingham', 'birmingham.csv', mobiles)
def run(): features = read_data(DATA_SOURCE_1) + read_data(DATA_SOURCE_2) features_sorted = sorted(features, key=lambda k: k['attributes']['STOP_NUMBE']) features_deduped = [ i for n, i in enumerate(features_sorted) if i not in features_sorted[n + 1:] ] mobiles = [] organisation = 'Norfolk' frequency = 'FREQ=WEEKLY;INTERVAL=4' for feature in features_deduped: route = feature['attributes']['ROUTE_NUMB'] stop_name = feature['attributes']['STOP_NAME'].title() details = feature['attributes']['DETAILS'] longitude = feature['geometry']['x'] latitude = feature['geometry']['y'] # Make the mobile library the first 3 letters of the route mobile_library = route[0:3] route_number = route[3:6] if mobile_library == 'CEN': mobile_library = 'CEN' + CEN_Routes[route] # Extract stuff from the details # e.g. Mobile Library Route Number EDE305 will next visit NORTH ELMHAM, ORCHARD CLOSE No.20 on 18/09/2019 Arrival 16:10 Departure 16:25 # First replace all HH:MM:SS details_matches = re.compile(DETAILS_RE) details_groups = details_matches.search(details) community = details_groups.group(2).title() date = datetime.strptime(details_groups.group(4), '%d/%m/%Y') - four_weeks arrival = details_groups.group(5)[0:5] departure = details_groups.group(6)[0:5] address = stop_name + ', ' + community start = date.strftime('%Y-%m-%d') day = date.strftime('%A') timetable = TIMETABLE + mobile_library + '-' + route_number + '.pdf' mobiles.append([ mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file(organisation, 'norfolk.csv', mobiles)
def run(): features = read_data(DATA_SOURCE) mobiles = [] organisation = 'Bath and North East Somerset' mobile_name = 'Mobile' frequency = 'FREQ=WEEKLY;INTERVAL=2' dates = { "Route 1": "2019-10-14", "Route 2": "2019-10-15", "Route 3": "2019-10-16", "Route 4": "2019-10-17", "Route 5": "2019-10-18", "Route 6": "2019-10-21", "Route 7": "2019-10-22", "Route 8": "2019-10-23", "Route 9": "2019-10-24" } for feature in features: route = feature['properties']['fields']['day_number'].replace('Day', 'Route') stop_name = feature['properties']['fields']['stop'] community = feature['properties']['fields']['village'] if stop_name == '': stop_name = community address = stop_name + ', ' + community day = feature['properties']['fields']['day'] time = feature['properties']['fields']['time'] arrival = time.split('-')[0].strip().replace('.', ':') departure = time.split('-')[-1].strip().replace('.', ':') start = '' if community != 'LIBRARY DEPOT': start = dates[route] easting = feature['geometry']['coordinates'][0][0] northing = feature['geometry']['coordinates'][0][1] latitude = '' longitude = '' point = geopandas.GeoSeries([Point(float(easting), float(northing))]) point.crs = {'init': 'epsg:27700'} point = point.to_crs(epsg=4326) longitude = str(point[0].x) latitude = str(point[0].y) if community != 'LIBRARY DEPOT': mobiles.append( [mobile_name, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', TIMETABLE] ) create_mobile_library_file(organisation, 'bath_and_north_east_somerset.csv', mobiles)
def run(): mobiles = [] with open(DATA_SOURCE) as data_file: data = json.load(data_file) features = data['features'] timetable = 'https://www.northlincs.gov.uk/schools-libraries-and-learning/libraries/the-mobile-library/' dates = { 12: '8/01/2019', 11: '21/01/2019', 13: '23/01/2019', 14: '24/01/2019', 15: '25/01/2019', 16: '26/01/2019', 21: '28/01/2019', 23: '9/01/2019', 24: '10/01/2019', 26: '12/01/2019', 31: '14/01/2019', 33: '16/01/2019', 34: '17/01/2019', 35: '18/01/2019', 36: '19/01/2019' } for feature in features: frequency = 'FREQ=WEEKLY;INTERVAL=3' route_id = feature['properties']['id'] if route_id == 12: frequency = 'FREQ=WEEKLY;INTERVAL=1' route_name = 'Route ' + str(route_id) day = feature['properties']['day'] address = feature['properties']['addr'] arrival = feature['properties']['time1'][:5] departure = feature['properties']['time2'][:5] community = feature['properties']['name'] stop_name = feature['properties']['addr'] longitude = feature['geometry']['coordinates'][0] latitude = feature['geometry']['coordinates'][1] date = datetime.strptime(dates[route_id], '%d/%m/%Y') start = date.strftime('%Y-%m-%d') mobiles.append( ['Mobile', route_name, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable] ) create_mobile_library_file( 'North Lincolnshire', 'north_lincolnshire.csv', mobiles)
def run(): ns = {'kml': 'http://www.opengis.net/kml/2.2'} tree = ET.parse(DATA_SOURCE) root = tree.getroot() mobiles = [] frequency = 'FREQ=WEEKLY;INTERVAL=2' timetable = 'https://www.culturepk.org.uk/libraries/services-in-the-community/mobile-library-service/' for mobile in root.find('kml:Document', ns).findall('kml:Folder', ns): mobile_library = mobile.find('kml:name', ns).text for stop in mobile.findall('kml:Placemark', ns): address = stop.find('kml:name', ns).text places = address.split(' - ') stop_name = places[0] community = places[-1] description = stop.find('kml:description', ns).text.strip() description_parts = description.split('<br>') times = description_parts[0] times_result = re.search(r'(\d{2}:\d{2}).*(\d{2}:\d{2})', times) arrival = times_result.group(1) departure = times_result.group(2) route = description_parts[1] for part in description_parts: if '-Mar' in part: date = datetime.strptime(part.split( ',')[1].strip() + ' 2019', '%d-%b %Y') start = date.strftime('%Y-%m-%d') day = date.strftime('%A') coordinates = stop.find('kml:Point', ns).find( 'kml:coordinates', ns).text.strip() longitude = coordinates.split(',')[0] latitude = coordinates.split(',')[1] mobiles.append( [mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable] ) create_mobile_library_file( 'Perth and Kinross', 'perth_and_kinross.csv', mobiles)
def run(): """Runs the main script""" mobiles_ns = [] mobiles_sg = [] timetable_ns = 'https://www.n-somerset.gov.uk/my-services/leisure/libraries/bringing-the-library-to-you/mobile-library/' timetable_sg = 'https://www.southglos.gov.uk/libraries/mobile-library-almondsbury-severn-beach-pilning/' mobile_library = 'Mobile' with open(DATA_SOURCE, 'r') as northsom_raw: mobreader = csv.reader(northsom_raw, delimiter=',', quotechar='"') next(mobreader, None) # skip the headers for row in mobreader: route = row[0].strip() community = row[1].strip() stop_name = row[2].strip() frequency = 'FREQ=WEEKLY;INTERVAL=2' day = row[4].strip() arrival = row[5].strip() departure = row[6].strip() start = row[7].strip() easting = float(row[8].strip()) northing = float(row[9].strip()) point = geopandas.GeoSeries([Point(easting, northing)]) point.crs = {'init': 'epsg:27700'} point = point.to_crs({'init': 'epsg:4326'}) longitude = str(point[0].x) latitude = str(point[0].y) address = stop_name + ', ' + community if route == 'Thursday 1': mobiles_sg.append([ mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable_sg ]) else: mobiles_ns.append([ mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable_ns ]) create_mobile_library_file('North Somerset', 'north_somerset.csv', mobiles_ns) create_mobile_library_file('South Gloucestershire', 'south_gloucestershire.csv', mobiles_sg)
def run(): features = read_data(DATA_SOURCE) features_sorted = sorted(features, key=lambda k: (k['attributes']['ROUTE'])) mobiles = [] mobile_name = 'Mobile' organisation = 'West Berkshire' frequency = 'FREQ=WEEKLY;INTERVAL=3' for feature in features_sorted: route = feature['attributes']['ROUTE'][0:1] times = feature['attributes']['TIME'] times_result = re.search(r'(\d{2}.\d{2}).*(\d{2}.\d{2})', times) arrival = times_result.group(1).replace('.', ':') departure = times_result.group(2).replace('.', ':') day = feature['attributes']['DAY'] community = feature['attributes']['VILLAGE'] stop_name = feature['attributes']['STOP'] address = feature['attributes']['NAME'] dates = feature['attributes']['DATES'] longitude = feature['geometry']['x'] latitude = feature['geometry']['y'] dates_search = re.search(r'Oct: (\d{1,2})', dates) date = datetime.strptime( dates_search.group(1) + ' Oct 2019', '%d %b %Y') start = date.strftime('%Y-%m-%d') day = date.strftime('%A') mobiles.append([ mobile_name, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', TIMETABLE ]) create_mobile_library_file(organisation, 'west_berkshire.csv', mobiles)
def run(): features = read_data(DATA_SOURCE) features_sorted = sorted(features, key=lambda k: k['attributes']['ADDRESS']) mobiles = [] organisation = 'York' frequency = 'FREQ=WEEKLY;INTERVAL=2' for feature in features_sorted: route = '' stop_name = feature['attributes']['ADDRESS'] longitude = feature['geometry']['x'] latitude = feature['geometry']['y'] # Make the mobile library the first 3 letters of the route mobile_library = 'Mobile' community = feature['attributes']['ADDRESS'] date = '' arrival = '' departure = '' address = feature['attributes']['ADDRESS'] start = '' day = '' timetable = TIMETABLE mobiles.append([ mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file(organisation, 'york.csv', mobiles)
def run(): features = read_data(DATA_SOURCE) features_sorted = sorted( features, key=lambda k: (k['attributes']['Route'], k['attributes']['Stop'])) mobiles = [] frequency = 'FREQ=WEEKLY;INTERVAL=2' for feature in features_sorted: route = feature['attributes']['Route'] place = feature['attributes']['Place'] stop_name = place.split(';')[-1].strip().title().replace("'S", "'s") community = place.split(';')[0].strip().title().replace("'S", "'s") address = stop_name + ', ' + community date = datetime.utcfromtimestamp( int(feature['attributes']['Date1']) / 1000) start = date.strftime('%Y-%m-%d') day = date.strftime('%A') arrival = feature['attributes']['From_'] departure = feature['attributes']['To'] mobile_name = feature['attributes']['Fullname'].replace( ' Mobile Library', '') longitude = feature['geometry']['x'] latitude = feature['geometry']['y'] mobiles.append([ mobile_name, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', TIMETABLE ]) create_mobile_library_file('Shropshire', 'shropshire.csv', mobiles)
def run(): organisation = 'Aberdeenshire' stop_type = 'Public' mobiles = [] with open(DATA_SOURCE, encoding='utf-8') as data_file: mobiles_data = json.load(data_file) for mobile in mobiles_data: mobile_library = mobile[2].replace( 'Mobile', '').replace('Library', '').strip() timetable = 'https://www.livelifeaberdeenshire.org.uk/media/' if "North" in mobile_library: timetable = timetable + '2808/mobile-north-timetable-160119.pdf' if "Central" in mobile_library: timetable = timetable + '2807/mobile-central-timetable-160119.pdf' if "South" in mobile_library: timetable = timetable + '2809/mobile-south-timetable-160119.pdf' # the stops are in an array stops = mobile[12][0][13][0] for stop in stops: coord_y = stop[1][0][0][0] coord_x = stop[1][0][0][1] stop_name = stop[5][0][1][0].replace('\n', '').strip() day = '' start = '' arrival = '' departure = '' week = '' dates = '' for attr in stop[5][3]: key = attr[0] val = attr[1][0].replace('\n', '').strip() if key == 'Day': day = val if key == 'Arrival': arrival = val if key == 'Departure': departure = val if key == 'Week': week = val.replace(' & ', '/').replace('Weeks ', '') if key == 'Dates': dates = val if (dates != '' and len(dates.split(',')) > 0): # e.g. January 22 should be 2019-01-22 date = dates.split(',')[0] + ' 2019' date = datetime.strptime(date, '%B %d %Y') start = date.strftime('%Y-%m-%d') # calculated fields route = mobile_library + ' ' + day + ' ' + week stop_array = stop_name.split(' - ') community = stop_array[0] if len(stop_array) > 1: stop_name = stop_array[1] address = stop_name + ', ' + community mobiles.append( [mobile_library, route, community, stop_name, address, '', coord_x, coord_y, day, stop_type, arrival, departure, 'FREQ=WEEKLY;INTERVAL=2', start, '', '', timetable] ) create_mobile_library_file(organisation, 'aberdeenshire.csv', mobiles)
def run(): """Runs the main script""" mobiles = [] mobile_library = 'Mobile' # A single web call listing stops stop_list_html = requests.get(WEBSITE) stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser') # For each stop get the stop details # Being lazy here and just taking the 4th list - can refine another time stops = stop_list_soup.find('div', {'class': 'LBH_MapItems'}).text.split('|')[2:] for stop in stops: stop_data = stop.split('~') latitude = stop_data[1] longitude = stop_data[2] road = stop_data[5] day = stop_data[8] address = stop_data[7].lstrip(';').replace(';;', ', ') stop_name = address.split(', ')[0] community = address.split(', ')[0] timetable = 'https://archive.hillingdon.gov.uk' + stop_data[9] town = community # Now use the stop URL to get the rest of the data (cleaner) stop_html = requests.get(timetable) stop_soup = BeautifulSoup(stop_html.text, 'html.parser') stop_table = stop_soup.find('table', {'class': 'LBH_Table'}) for row in stop_table.find_all('tr'): if 'Road' in row.find('th').text: road = row.find_all('td')[0].text.strip() if 'Location' in row.find('th').text: stop_name = row.find_all('td')[0].text.strip() if 'Town' in row.find('th').text: town = row.find_all('td')[0].text.strip() if 'Area' in row.find('th').text: community = row.find_all('td')[0].text.strip() if 'Day' in row.find('th').text: day = row.find_all('td')[0].text.strip() if 'Arrive' in row.find('th').text: arrival = row.find_all('td')[0].text.strip() if 'Depart' in row.find('th').text: departure = row.find_all('td')[0].text.strip() address = stop_name + ', ' + road + ', ' + community + ', ' + town route = day # will need to manually correct routes after start = '' # will manually set start dates later mobiles.append( [mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, 'FREQ=WEEKLY', start, '', '', timetable] ) create_mobile_library_file('Hillingdon', 'hillingdon.csv', mobiles)
def run(): timetable = 'https://www.cambridgeshire.gov.uk/residents/libraries-leisure-&-culture/libraries/mobiles/find-a-mobile-library-stop/' mobiles = [] day_codes = { 'Monday': [MO, 'MO'], 'Tuesday': [TU, 'TU'], 'Wednesday': [WE, 'WE'], 'Thursday': [TH, 'TH'], 'Friday': [FR, 'FR'], 'Saturday': [SA, 'SA'], 'Sunday': [SU, 'SU'] } with open(DATA_SOURCE_STOPS, 'r') as cam_raw: mobreader = csv.reader(cam_raw, delimiter=',', quotechar='"') next(mobreader, None) # skip the headers for row in mobreader: mobile_library = row[0].strip() route = row[1].strip() week = row[2].strip() day = row[3].strip() community = row[5].strip() stop_name = row[6].strip() address = stop_name + ', ' + community postcode = row[7].strip() frequency = row[8].strip() easting = row[9].strip() northing = row[10].strip() arrival = row[11][0:5].strip() departure = row[12][0:5].strip() freq = MONTHLY repeat_rule = 'FREQ=MONTHLY;BYDAY=' if '&' in week: week = (int(week.split('&')[0].strip()[:1]), int(week.split('&')[1].strip()[:1])) repeat_rule = repeat_rule + ','.join(map(lambda x: str(x) + day_codes[day][1], week)) else: week = int(week[:1]) repeat_rule = repeat_rule + str(week) + day_codes[day][1] start = rrule(freq=freq, dtstart=date.today(), bysetpos=week, byweekday=day_codes[day][0], count=1)[0] start = start.strftime('%Y-%m-%d') latitude = '' longitude = '' point = geopandas.GeoSeries([Point(float(easting), float(northing))]) point.crs = {'init': 'epsg:27700'} point = point.to_crs(epsg=4326) longitude = str(point[0].x) latitude = str(point[0].y) mobiles.append( [mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, repeat_rule, start, '', '', timetable] ) mobiles_deduped = [i for n, i in enumerate(mobiles) if i not in mobiles[n + 1:]] create_mobile_library_file('Cambridgeshire', 'cambridgeshire.csv', mobiles_deduped)
def run(): """Runs the main script""" mobiles = [] mobile_library = 'Mobile 1' pages = [1, 2, 3, 4] dates = { "Monday": "2019-05-20", "Tuesday": "2019-05-21", "Wednesday": "2019-05-22", "Thursday": "2019-05-23", "Friday": "2019-05-24", } for page in pages: # A single web page listing stops stop_list_html = requests.get(WEBSITE + DATA_SOURCE + str(page)) stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser') # For each stop get the stop details # Being lazy here and just taking the 4th list - can refine another time for link in stop_list_soup.find_all('ul')[3].find_all('a'): stop_name = link.string stop_url = link.get('href') stop_html = requests.get(WEBSITE + stop_url) stop_soup = BeautifulSoup(stop_html.text, 'html.parser') community = '' arrival = '' departure = '' day = '' route = '' location = stop_soup.find( id='map_marker_location_197').get('value') longitude = location.split(',')[1] latitude = location.split(',')[0] timetable = WEBSITE + stop_url data_table = stop_soup.find_all('table')[0] for row in data_table.find_all('tr'): row_header = row.find('th').string.strip() if 'Day' in row_header: times = row.find('td').contents[0].replace('.', '') day_matcher = re.compile('(Mon|Tues|Wed|Thurs|Fri)') day = day_matcher.search(times).group(1) + 'day' if (day == 'Wedday'): day = 'Wednesday' times_matcher = re.compile('\d{1,4}') times_matches = re.findall(times_matcher, times) if len(times_matches) > 0: arrival = times_matches[0] arrival_hours = '00' arrival_mins = '00' if len(arrival) == 1: arrival_hours = arrival.rjust(2, '0') if len(arrival) == 2: arrival_hours = arrival if len(arrival) == 3: arrival_hours = arrival[0:1].rjust(2, '0') arrival_mins = arrival[1:3] if len(arrival) == 4: arrival_hours = arrival[0:2] arrival_mins = arrival[2:4] if int(arrival_hours) < 8: arrival_hours = int(arrival_hours) + 12 arrival = str(arrival_hours) + ':' + arrival_mins if len(times_matches) > 1: departure = times_matches[1] departure_hours = '00' departure_mins = '00' if len(departure) == 1: departure_hours = departure.rjust(2, '0') if len(departure) == 2: departure_hours = departure if len(departure) == 3: departure_hours = departure[0:1].rjust(2, '0') departure_mins = departure[1:3] if len(departure) == 4: departure_hours = departure[0:2] departure_mins = departure[2:4] if int(departure_hours) < 8: departure_hours = int(departure_hours) + 12 departure = str(departure_hours) + ':' + departure_mins route = day start = dates[day] if 'Address' in row_header: address = row.find('td').contents[0].strip() if 'Postcode' in row_header: postcode = row.find('td').contents[0].strip() mobiles.append( [mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, 'FREQ=WEEKLY', start, '', '', timetable] ) create_mobile_library_file('Edinburgh', 'edinburgh.csv', mobiles)
def run(): """Runs the main script""" # manual locations locations = { 'Glendale, Culford': [52.295329, 0.693698], 'Village Hall, Risby': [52.265824, 0.634437], 'Tutelina Road, Great Welnetham': [52.205614, 0.750686], 'Walnut Tree Cottage, West Stow': [52.334333, 0.521259], 'The Green, Honington': [52.336852, 0.806826], 'Bus Shelter, Boxstead Row': [52.139440, 0.677079], 'Village Hall, Brockley': [52.159501, 0.667900], 'Top of the Hill, Hartest': [52.135100, 0.687976], 'Community Hall, Whepstead': [52.193932, 0.681194], 'Three Ways, Whepstead': [52.194667, 0.673889], 'Village Hall, Ousden': [52.204576, 0.553058], 'Church, Ousden': [52.207750, 0.538848], 'Chequers, Gazely': [52.247857, 0.517684], 'Needham Hall, Needham Street': [52.263197, 0.520293], 'The School, Tuddenham St Mary': [52.313586, 0.547267], 'Opp. 14 Cavenham Road, Tuddenham St Mary': [52.312273, 0.550346], 'Bishopscroft, Barningham': [52.356090, 0.890150], 'Honey Pot Lane, Wattisfield': [52.320952, 0.951440], 'Village Hall, Wattisfield': [52.330484, 0.948689], '109 Bury Road, Great Thurlow': [52.126156, 0.456243], 'Meadow Drive, Horringer': [52.222825, 0.673222], 'Church, Cotton': [52.261369, 1.032773], 'Methodist Church, Cotton': [52.264101, 1.019484], 'Village Hall, Norton': [52.257070, 0.866380], 'Church Road, Newton Green': [52.036365, 0.796942], 'Flint Cottages, Smallbridge': [52.173870, 0.794328], 'Half Moon, Hepworth': [52.335184, 0.913022], 'Ivy Nook, Beck Street, Hepworth': [52.346656, 0.920684], 'Stanton Stores, Stanton': [52.323876, 0.884184], 'Hilltop, Stanton': [52.333412, 0.880192], 'Community Centre, Walsham Le Willows': [52.302602, 0.935680], 'Town House Road, Walsham Le Willows': [52.300731, 0.943665], 'Pumping Station, Cavendish': [52.090559, 0.640543], 'Church, Cavendish': [52.087629, 0.632805], 'The Chapel, Lawshall': [52.154713, 0.733109], 'Swanfields, Lawshall': [52.155029, 0.727303], 'Village Hall, Lawshall': [52.159793, 0.714417], 'Village Hall, Buxhall': [52.180829, 0.915938], 'Green Farm Cottage, Thorpe Morieux': [52.145783, 0.836211], 'Village Hall, Thorpe Morieux': [52.148876, 0.837022], 'Primary School, Boxford': [52.027547, 0.859810], 'Bridge Farm Day Nursery, Martlesham': [52.081405, 1.288248], 'Falcon Mobile Home Park, Martlesham': [52.064742, 1.281813], 'Douglas Bader, Martlesham Heath': [52.060970, 1.272164], 'Village Hall, Bentley': [51.989546, 1.071395], 'Whissels Farm, Creeting St Mary': [52.177112, 1.085009], 'Village Hall, Creeting St Mary': [52.172051, 1.070963], 'Church, Little Stonham': [52.199084, 1.089106], 'Magpie Inn, Little Stonham': [52.201534, 1.099858], 'The Green, Higham': [52.260693, 0.557358], 'Fenn View, Washbrook': [52.034945, 1.078822], 'Church, Battisford': [52.149227, 1.001806], 'Battisford Pre School, Battisford': [52.146993, 0.980069], 'Farnish House, Botesdale': [52.341407, 1.006361], 'Village Hall, Botesdale': [52.345144, 1.008976], 'Backhills, Botesdale': [52.345331, 1.006312], 'Broom Knoll, East Bergholt': [51.971927, 1.061666], 'South View, East Bergholt': [51.976061, 1.055943], 'Red Lion, East Bergholt': [51.971688, 1.011434], 'Paddock Way, Bildeston': [52.105056, 0.910917], 'White Horse, Hitcham': [52.124151, 0.894430], 'Village Hall, Hitcham': [52.127769, 0.900729], 'The Swan car park, Hoxne': [52.349545, 1.199516], 'St Edmunds House, Hoxne': [52.339660, 1.208437], 'Fish and Chip shop, Mendlesham': [52.251120, 1.082581], 'Recreation ground, Mendlesham': [52.248412, 1.078985], 'The Green, Mendlesham': [52.226707, 1.069991], 'Lay-by, Wickham Skeith': [52.281808, 1.071562], 'Swan House, Wickham Skeith': [52.286511, 1.064710], 'Moorlands, Hollesley': [52.054773, 1.430125], 'Shepherd & Dog PH, Hollesley': [52.050942, 1.430870], 'Harewood House, Hollesley': [52.039702, 1.412796], 'Orchardleigh, North Cove': [52.446345, 1.629062], 'Old Post Office, Mutford': [52.437027, 1.650539], 'Hartismere House, Laxfield': [52.300566, 1.363823], 'New Dawn, Chediston': [52.354000, 1.450951], 'Midsummer Cottage, Chediston': [52.348039, 1.459990], 'Nursery, Rendlesham': [52.126116, 1.414609], 'Village Hall, Wenhaston': [52.323520, 1.558421], 'Ashburnham Way (Co-op car park), Carlton Colville': [52.457034, 1.703553], 'Green, Saxtead': [52.231834, 1.299388], 'Foxearth Nursing Home, Saxtead': [52.247412, 1.297638], 'Bell Inn, Middleton': [52.255023, 1.557760], 'Mulberry Bush nursery, Eye': [52.329782, 1.144394], 'Hartismere House, Eye': [52.322536, 1.139949], 'Spring Park, Otley': [52.157872, 1.229017], 'Village Stores, Otley': [52.151381, 1.220365], 'Village Hall, Snape': [52.170943, 1.500930], 'Near pond, Lound': [52.547929, 1.682302], 'Lound Hall, Lound': [52.530358, 1.702772], 'Village Way, Waldringfield': [52.530358, 1.702772] } mobiles = [] coordinates = [] route_list_html = requests.get(WEBSITE + ROUTES) route_list_soup = BeautifulSoup(route_list_html.text, 'html.parser') route_links = [] for li in route_list_soup.find_all('li'): if li.find('a') is not None and li.find('a').get( 'title') is not None and 'Mobile Library Route' in li.find( 'a').get('title'): title = li.text.strip() route_links.append({ 'href': li.find('a').get('href'), 'day': title.split(' ')[2], 'route': title.split(' ')[0] + ' ' + title.split(' ')[1] }) for route_link in route_links: # A single web page listing stops stop_list_html = requests.get(WEBSITE + str(route_link['href'])) stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser') route_title = stop_list_soup.find('h1').text.strip() mobile_library = route_title.split(' Mobile Library ')[0] paras = stop_list_soup.find_all('p') dates = [] for para in paras: if '2020' in para.text: dates = para.text.strip().split(', ') if len(dates) > 0: date = datetime.strptime(dates[2] + ' 2020', '%d %B %Y') start = date.strftime('%Y-%m-%d') else: start = '' # For each stop get the stop details for stop in stop_list_soup.find_all('tr')[1:]: route = mobile_library + ' ' + route_link['route'] community = stop.find_all('td')[1].string.strip() stop_name = stop.find_all('td')[2].string.strip() address = stop_name + ', ' + community postcode = '' longitude = '' latitude = '' day = route_link['day'][:-1] times = re.sub(r'\D', '', stop.find_all('td')[3].text) arrival = times[:2] + ':' + times[2:4] departure = times[4:6] + ':' + times[6:] timetable = WEBSITE + route_link['href'] if address not in locations: # Geocoding: get the lat/lng geo_json = requests.get(NOM_URL + address + '&viewbox=' + BOUNDS).json() if len(geo_json) == 0: geo_json = requests.get(NOM_URL + community + '&viewbox=' + BOUNDS).json() if len(geo_json) > 0: x = round(float(geo_json[0]['lon']), 5) y = round(float(geo_json[0]['lat']), 5) bbox = BOUNDS.split(',') if float(bbox[0]) <= x and x <= float(bbox[2]) and float( bbox[1]) <= y and y <= float( bbox[3]) and x not in coordinates: # Don't add duplicates - we'll manually sort em out laters coordinates.append(x) longitude = x latitude = y else: longitude = locations[address][1] latitude = locations[address][0] mobiles.append([ mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, 'FREQ=WEEKLY;INTERVAL=4', start, '', '', timetable ]) time.sleep(6) create_mobile_library_file('Suffolk', 'suffolk.csv', mobiles)
def run(): """Runs the main script""" mobiles = [] mobile_library = 'Mobile 1' frequency = 'FREQ=WEEKLY;INTERVAL=3' # A single web page listing stops url = WEBSITE + DATA_SOURCE stop_list_html = requests.get(url) stop_list_soup = BeautifulSoup(stop_list_html.text, 'lxml') # For each stop get the stop details for route in stop_list_soup.find_all( 'div', {"class": "field-group-accordion-wrapper"}): route_name = route.find('h4').string.split('(')[0].strip() day = route.find('h4').string.split('(')[1].split(',')[1].replace( ')', '').strip() stops = route.find_all('ol')[0] start = 'Jan 3' dates = route.find_all('ul')[0] date_list = dates.find_all('li') if (date_list[0].string): start = dates.find_all('li')[0].string.strip() if 'and' in start: start = start.split('and')[0] date_obj = datetime.strptime(start + ' 2019', '%b %d %Y') start = datetime.strftime(date_obj, '%Y-%m-%d') for stop in stops.find_all('li'): # first extract the postcode postcode_match = re.compile(POSTCODE_RE).search(stop.string) postcode = '' if postcode_match: postcode = postcode_match.group(1) if postcode == '' and 'Brymbo' in stop.string: postcode = 'LL11 5AG' if postcode == '' and 'Cynddelw School' in stop.string: postcode = 'LL20 7HH' if postcode == 'SY13 0GB': postcode = 'LL13 0GB' if postcode == 'LL11 5GS': postcode = 'LL11 5SY' # then do the geocoding from postcode lookup postcode_request = requests.get( 'https://api.postcodes.io/postcodes/' + postcode) postcode_data = json.loads(postcode_request.text) latitude = postcode_data['result']['latitude'] longitude = postcode_data['result']['longitude'] # take postcode out of the main string stop_str = stop.string.replace(postcode, '').replace('.', ':').replace('–', '-') data_match = re.compile(DATA_RE) data = data_match.search(stop_str) community = data.group(1).strip() stop_name = data.group(2).strip() arrival_hours = data.group(3).strip() if int(arrival_hours) < 8: arrival_hours = int(arrival_hours) + 12 arrival_mins = data.group(4).strip() if arrival_mins == '': arrival_mins = '00' departure_hours = data.group(5).strip() if int(departure_hours) < 8: departure_hours = int(departure_hours) + 12 departure_mins = data.group(6).strip() if departure_mins == '': departure_mins = '00' arrival = str(arrival_hours) + ':' + arrival_mins departure = str(departure_hours) + ':' + departure_mins address = stop_name + ', ' + community mobiles.append([ mobile_library, route_name, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', url ]) create_mobile_library_file('Wrexham', 'wrexham.csv', mobiles)
def run(): """Runs the main script""" mobiles = [] route_list_html = requests.get(WEBSITE + ROUTES_LIST) route_list_soup = BeautifulSoup(route_list_html.text, 'html.parser') route_links = [] for link in route_list_soup.find_all('a'): if 'mobile-library-route' in link.get('href'): route_links.append(link.get('href')) for link in route_links: route_text = '' route_encoded_link = str( base64.urlsafe_b64encode(link.encode("utf-8")), 'utf-8') if not path.exists('../raw/essex/' + route_encoded_link + '.txt'): route_html = requests.get(link) route_text = route_html.text # save the data out as web scraping seems to be getting blocked so may take a few goes route_file = open('../raw/essex/' + route_encoded_link + '.txt', "w") route_file.write(route_text) route_file.close() time.sleep(10) else: route_text = open('../raw/essex/' + route_encoded_link + '.txt', 'r').read() route_soup = BeautifulSoup(route_text, 'html.parser') stop_links = [] for stop_link in route_soup.find_all('table')[0].find_all('a'): stop_links.append(WEBSITE + stop_link.get('href')) for stop in stop_links: stop_text = '' stop_encoded_link = str(base64.urlsafe_b64encode( stop.encode("utf-8")), 'utf-8') if not path.exists('../raw/essex/' + stop_encoded_link + '.txt'): stop_html = requests.get(stop) stop_text = stop_html.text stop_file = open('../raw/essex/' + stop_encoded_link + '.txt', "w") stop_file.write(stop_text) stop_file.close() else: stop_text = open('../raw/essex/' + stop_encoded_link + '.txt', 'r').read() stop_soup = BeautifulSoup(stop_text, 'html.parser') values = stop_soup.find_all('div', {"class": "pfont"}) stop_name = stop_soup.find_all( 'div', {"class": "yellow-wrapper"})[0].find("h1").text community = values[0].text.strip().splitlines()[0].strip() address = stop_name + ', ' + community postcode = values[0].text.strip().splitlines()[-1].strip() if postcode == 'CM133AS': postcode = 'CM132AS' if postcode == 'RM4 1ED': postcode = 'RM4 1LU' frequency = 'FREQ=WEEKLY;INTERVAL=' + values[1].text.strip()[:1] day = values[2].text.strip() times = values[3].text.strip() route_mobile = values[4].text.strip() route = 'Week ' + route_mobile.split('week')[1].strip() + ' ' + day mobile_library = route_mobile.split('week')[0].strip() start = values[6].text.strip() start = datetime.strptime(start, '%d %B %Y') start = start.strftime('%Y-%m-%d') arrival = times.split('to')[0].replace('am', '').replace( 'pm', '').strip().replace('.', '') arrival_hours = '00' arrival_mins = '00' if len(arrival) == 1: arrival_hours = arrival.rjust(2, '0') if len(arrival) == 2: arrival_hours = arrival if len(arrival) == 3: arrival_hours = arrival[0:1].rjust(2, '0') arrival_mins = arrival[1:3] if len(arrival) == 4: arrival_hours = arrival[0:2] arrival_mins = arrival[2:4] if int(arrival_hours) < 8: arrival_hours = int(arrival_hours) + 12 arrival = str(arrival_hours) + ':' + arrival_mins departure = times.split('to')[1].replace( 'am', '').replace('pm', '').strip().replace('.', '') departure_hours = '00' departure_mins = '00' if len(departure) == 1: departure_hours = departure.rjust(2, '0') if len(departure) == 2: departure_hours = departure if len(departure) == 3: departure_hours = departure[0:1].rjust(2, '0') departure_mins = departure[1:3] if len(departure) == 4: departure_hours = departure[0:2] departure_mins = departure[2:4] if int(departure_hours) < 8: departure_hours = int(departure_hours) + 12 departure = str(departure_hours) + ':' + departure_mins url = 'https://api.postcodes.io/postcodes/' + postcode postcode_request = requests.get(url) postcode_data = json.loads(postcode_request.text) latitude = postcode_data['result']['latitude'] longitude = postcode_data['result']['longitude'] mobiles.append( [mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', stop] ) create_mobile_library_file('Essex', 'essex.csv', mobiles)
def run(): ns = {'kml': 'http://www.opengis.net/kml/2.2'} dates = { 1: { "Monday": "2020-02-24", "Tuesday": "2020-02-25", "Wednesday": "2020-02-26", "Thursday": "2020-02-27", "Friday": "2020-02-28" }, 2: { "Monday": "2020-03-02", "Tuesday": "2020-03-03", "Wednesday": "2020-03-04", "Thursday": "2020-03-05", "Friday": "2020-03-06" } } tree = ET.parse(DATA_SOURCE) root = tree.getroot() mobiles = [] timetable = 'https://www.midlothian.gov.uk/info/427/libraries/446/mobile_library' mobile_library = 'Mobile' for route in root.find('kml:Document', ns).findall('kml:Folder', ns): frequency = 'FREQ=WEEKLY;INTERVAL=2' route_name = route.find('kml:name', ns).text.replace(' Stops', '') day = 'Monday' if 'Tuesday' in route_name: day = 'Tuesday' if 'Wednesday' in route_name: day = 'Wednesday' if 'Thursday' in route_name: day = 'Thursday' if 'Friday' in route_name: day = 'Friday' duplicate = False week = 2 if 'Week 1' in route_name: week = 1 if '&' in route_name: # stop is also a route 2 stop duplicate = True route_name = route_name.replace('Week 1 & 2', 'Week 1') start = dates[week][day] for stop in route.findall('kml:Placemark', ns): address = stop.find('kml:name', ns).text stop_name = stop.find('kml:name', ns).text community = stop.find('kml:name', ns).text description = stop.find('kml:description', ns).text.strip() if '<br' in description: detail = description.split('<br')[0].strip() if 'day' not in detail: stop_name = description.split('<br')[0].strip() address = stop_name + ', ' + community times_result = re.search(r'(\d{1,2}:\d{2}).*?(\d{1,2}:\d{2})', description.replace('.', ':')) arrival = times_result.group(1) departure = times_result.group(2) coordinates = stop.find('kml:Point', ns).find('kml:coordinates', ns).text.strip() longitude = coordinates.split(',')[0] latitude = coordinates.split(',')[1] mobiles.append([ mobile_library, route_name, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) if duplicate: mobiles.append([ mobile_library, route_name.replace('Week 1', 'Week 2'), community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, frequency, dates[2][day], '', '', timetable ]) create_mobile_library_file('Midlothian', 'midlothian.csv', mobiles)
def run(): """Runs the main script""" mobiles = [] mobile_library = 'Mobile' route_names = {} route_number = 0 stop_soup = BeautifulSoup(open(HTML), "html.parser") for venue in stop_soup.find_all('div', {"class": "er-filter-block-wrapper"}): venue_type = venue.find('div', { "data-id": "venue_type" }).find('div', { "class": "content" }).get_text() if (venue_type) == 'Library_All/Library_Mobile Library': stop_name = venue.find('div', { "data-id": "name" }).find('div', { "class": "content" }).find('a').get_text().replace(' Mobile Library', '') community = stop_name postcode = venue.find('div', { "data-id": "postcode" }).find('div', { "class": "content" }).get_text() address = stop_name + ', ' + postcode latitude = venue.find('div', { "data-id": "latitude" }).find('div', { "class": "content" }).get_text() longitude = venue.find('div', { "data-id": "longitude" }).find('div', { "class": "content" }).get_text() timetable = 'https://www.eastridinglibraries.co.uk' + \ venue.find('div', {"data-id": "learn_more"}).find('div', {"class": "content"}).find('a').get('href').strip() openings = venue.find('div', { "data-id": "opening_times" }).find('div', { "class": "content" }).get_text().split(';') opening_times = '' for entry in openings: if '2019' in entry: opening_times = entry times_result = re.search( r'(\d{1,2}\.{0,1}\d{1,2})-(\d{1,2}\.{0,1}\d{1,2})', opening_times) arrival = times_result.group(1).replace('.', ':') if len(arrival) < 3: arrival = arrival + ':00' if (len(arrival.split(':')[1])) < 2: arrival = arrival + '0' departure = times_result.group(2).replace('.', ':') if len(departure) < 3: departure = departure + ':00' if (len(departure.split(':')[1])) < 2: departure = departure + '0' date_result = re.search(r'(2019\d{4})', opening_times) start = datetime.strptime(date_result.group(1), '%Y%m%d') day = start.strftime('%A') start = start.strftime('%Y-%m-%d') frequency_result = re.search(r'2019\d{4}(\d)', opening_times) frequency = 'FREQ=WEEKLY;INTERVAL=' + frequency_result.group(1) if not route_names.get(start): route_number = route_number + 1 route_names[start] = route_number route = route_names[start] mobiles.append([ mobile_library, route, community, stop_name, address, postcode, longitude, latitude, day, 'Public', arrival, departure, frequency, start, '', '', timetable ]) create_mobile_library_file('East Riding of Yorkshire', 'east_riding_of_yorkshire.csv', mobiles)
def run(): """Runs the main script""" # Scrape stop information. This is a single web page listing stops stop_list = 'https://services.wiltshire.gov.uk/MobileLibrary/Library/StopList' stop_list_html = requests.get(stop_list) stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser') # If we don't already have it, create the raw file if not os.path.isfile(DATA_OUTPUT_RAW): mobiles = [] # For each stop get the stop details for link in stop_list_soup.find_all('a'): # Detect whether the link is a link to a stop if '/MobileLibrary/Library/Stop/' in link.get('href'): # Get the webpage stop_url = 'https://services.wiltshire.gov.uk' + link.get('href') stop_html = requests.get(stop_url) stop_soup = BeautifulSoup(stop_html.text, 'html.parser') # General stop information stop_name = stop_soup.find('h2').text.strip() community = stop_name.split(', ')[0].strip() stop_name = stop_name.split( ', ')[1].replace(' (fortnightly stop)', '').strip() address = stop_name + ', ' + community # There are some stops that are two weekly but they're part of separate routes. Keep them separate frequency = 4 # Detailed information for the stop is found in the table. table = stop_soup.find('table').find('tbody') stop_rows = table.find_all('tr') for stop in stop_rows: round_name = stop.find('a').text.replace( '\r\n', '').replace(' (fortnightly stop)', '') mobile_library = round_name.split( ', ')[0].replace(' mobile library', '') day_week = round_name.split(', ')[1] route = day_week.replace('week', 'Week') week = day_week.split(' week ')[1] day = day_week.split(' week ')[0] date = datetime.strptime(stop.find('li').text, '%A %d %B, %Y') date_output = date.strftime('%Y-%m-%d') start = stop.find_all('td')[1].text end = stop.find_all('td')[2].text timetable = 'http://services.wiltshire.gov.uk' + \ stop.find('a').get('href') # Mobile,Route,Stop,Community,Address,Longitude,Latitude,Date,Day,Frequency,Start,End,Timetable mobile = {'mobile': mobile_library, 'route': route, 'stop': stop_name, 'community': community, 'address': address, 'date': date_output, 'day': day, 'frequency': frequency, 'start': start, 'end': end, 'timetable': timetable} mobiles.append(mobile) time.sleep(1) with open(DATA_OUTPUT_RAW, 'w', encoding='utf8', newline='') as out_raw: mob_writer = csv.writer( out_raw, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) mob_writer.writerow(['Mobile', 'Route', 'Community', 'Stop', 'Address', 'Date', 'Day', 'Frequency', 'Start', 'End', 'Timetable']) for sto in mobiles: mob_writer.writerow([sto['mobile'], sto['route'], sto['community'], sto['stop'], sto['address'], sto['date'], sto['day'], sto['frequency'], sto['start'], sto['end'], sto['timetable']]) mobiles = [] coordinates = [] with open(DATA_OUTPUT_RAW, 'r', encoding='utf8', newline='') as raw: mobreader = csv.reader(raw, delimiter=',', quotechar='"') next(mobreader, None) # skip the headers # Mobile,Route,Community,Stop,Address,Date,Day,Frequency,Start,End,Timetable for row in mobreader: longitude = '' latitude = '' if row[4] not in locations: # Geocoding: get the lat/lng geo_json = requests.get(NOM_URL + row[4] + '&viewbox=' + BOUNDS).json() if len(geo_json) == 0: geo_json = requests.get( NOM_URL + row[2] + '&viewbox=' + BOUNDS).json() if len(geo_json) > 0: x = round(float(geo_json[0]['lon']), 5) y = round(float(geo_json[0]['lat']), 5) bbox = BOUNDS.split(',') if float(bbox[0]) <= x and x <= float(bbox[2]) and float(bbox[1]) <= y and y <= float(bbox[3]) and x not in coordinates: # Don't add duplicates - we'll manually sort em out laters coordinates.append(x) longitude = x latitude = y else: longitude = locations[row[4]][1] latitude = locations[row[4]][0] # Mobile,Route,Stop,Community,Address,Longitude,Latitude,Date,Day,Frequency,Start,End,Timetable mobile = [row[0], row[1], row[2], row[3], row[4], '', longitude, latitude, row[6], 'Public', row[8], row[9], 'FREQ=WEEKLY;INTERVAL=4', row[5], '', '', row[10]] mobiles.append(mobile) create_mobile_library_file('Wiltshire', 'wiltshire.csv', mobiles)
def run(): """Runs the main script""" mobiles = [] mobile_library = 'Mobile' # Get the A-Z links az_list_html = requests.get(WEBSITE + A_Z_PAGE) az_list_soup = BeautifulSoup(az_list_html.text, 'html.parser') for az_link in az_list_soup.find( 'ul', { 'class': 'item-list item-list__inline a-to-z' }).find_all('a'): # A single web page listing stops for the alphabet letter stop_list_html = requests.get(WEBSITE + az_link.get('href')) stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser') # For each stop get the stop details for link in stop_list_soup.find_all( 'ul', {'class': 'item-list'})[2].find_all('a'): community = link.text.replace(' Mobile Library Timetable', '') stop_url = link.get('href') stop_html = requests.get(WEBSITE + stop_url) stop_soup = BeautifulSoup(stop_html.text, 'html.parser') stop_schedule = stop_soup.find( 'table', { 'class': 'data-table directory-record' }).find_all('td')[0].text schedule_matcher = re.compile(r'(\d)(st|nd|rd|th) (.*day)') schedule_search = schedule_matcher.search(stop_schedule) week = int(schedule_search.group(1)) day = schedule_search.group(3) start = rrule(freq=MONTHLY, dtstart=date.today(), bysetpos=week, byweekday=day_codes[day][0], count=1)[0] start = start.strftime('%Y-%m-%d') repeat_rule = 'FREQ=MONTHLY;BYDAY=' + str(week) + day_codes[day][1] route = day + ' Week ' + str(week) location = stop_soup.find( id='map_marker_location_10798').get('value') longitude = location.split(',')[1] latitude = location.split(',')[0] if community == 'Longdon': longitude = '-2.239241' latitude = '52.023780' if community == 'Sytchampton': stop_rows = ['11:05 to 11:20 - Sytchampton, Brakeshill'] else: editor = stop_soup.find('div', {'class': 'editor'}) stop_rows = [ stop.text.replace('\r\n', ' ') for stop in editor.find_all('p') ] if len(stop_rows) < 5: stop_rows = stop_soup.find('div', { 'class': 'editor' }).text.splitlines() for stop in stop_rows: if (community in stop): stop_times_matcher = re.compile( r'(\d{1,2}:\d{2}).*?(\d{1,2}:\d{2})(.*)') times_result = re.search(stop_times_matcher, stop.replace('.', ':')) arrival = times_result.group(1) departure = times_result.group(2) stop_name = times_result.group(3).replace( community, '').replace(',', '').replace('-', '').replace('–', '').strip() if stop_name == '': stop_name = community address = stop_name + ', ' + community mobiles.append([ mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, repeat_rule, start, '', '', WEBSITE + stop_url ]) create_mobile_library_file('Worcestershire', 'worcestershire.csv', mobiles)
def run(): dates = { "Monday": "2019-04-08", "Tuesday": "2019-04-09", "Wednesday": "2019-04-10", "Thursday": "2019-04-11", "Friday": "2019-04-12", "Saturday": "2019-04-13" } # add more as needed namespaces = {'xmlns': 'http://www.opengis.net/kml/2.2'} kml_tree = ET.parse(DATA_SOURCE) root = kml_tree.getroot() organisation = 'West Dunbartonshire' mobile = 'Mobile' timetable = 'https://www.west-dunbarton.gov.uk/libraries/mobile-housebound-services/mobile-library-service/mobile-library-timetable/' mobiles = [] for folder in root.find('xmlns:Document', namespaces).findall('xmlns:Folder', namespaces): folder_name = folder.find('xmlns:name', namespaces).text sections = re.split('(?i)morning|afternoon', folder_name) route_name = sections[0].strip() community = sections[1].replace('-', '').strip() start = dates[route_name] for stop in folder.findall('xmlns:Placemark', namespaces): stop_name = stop.find('xmlns:name', namespaces).text address = stop_name + ', ' + community coords = stop.find('xmlns:Point', namespaces).find( 'xmlns:coordinates', namespaces).text geox = coords.split(',')[0].strip() geoy = coords.split(',')[1].strip() day = route_name description = stop.find('xmlns:description', namespaces).text description_sections = re.split( '(?i)morning|afternoon', description) times = description_sections[1].strip().replace( '.', '').replace(':', '') times_matcher = re.compile('\d{1,4}') times_matches = re.findall(times_matcher, times) if len(times_matches) > 0: arrival = times_matches[0] arrival_hours = '00' arrival_mins = '00' if len(arrival) == 1: arrival_hours = arrival.rjust(2, '0') if len(arrival) == 2: arrival_hours = arrival if len(arrival) == 3: arrival_hours = arrival[0:1].rjust(2, '0') arrival_mins = arrival[1:3] if len(arrival) == 4: arrival_hours = arrival[0:2] arrival_mins = arrival[2:4] if int(arrival_hours) < 8: arrival_hours = int(arrival_hours) + 12 arrival = str(arrival_hours) + ':' + arrival_mins if len(times_matches) > 1: departure = times_matches[1] departure_hours = '00' departure_mins = '00' if len(departure) == 1: departure_hours = departure.rjust(2, '0') if len(departure) == 2: departure_hours = departure if len(departure) == 3: departure_hours = departure[0:1].rjust(2, '0') departure_mins = departure[1:3] if len(departure) == 4: departure_hours = departure[0:2] departure_mins = departure[2:4] if int(departure_hours) < 8: departure_hours = int(departure_hours) + 12 departure = str(departure_hours) + ':' + departure_mins mobiles.append( [mobile, route_name, community, stop_name, address, '', geox, geoy, day, 'Public', arrival, departure, 'FREQ=WEEKLY;INTERVAL=2', start, '', '', timetable] ) create_mobile_library_file( organisation, 'west_dunbartonshire.csv', mobiles)
def run(): mobiles = [] with open(DATA_SOURCE) as data_file: data = json.load(data_file) features = data['features'] timetable = 'https://www.angusalive.scot/media/1708/mobile20library20timetable20-20new20service.pdf' dates = { 1: { "Monday": "2019-03-04", "Tuesday": "2019-03-05", "Wednesday": "2019-03-06", "Thursday": "2019-03-07", "Friday": "2019-03-08", }, 2: { "Monday": "2019-03-11", "Tuesday": "2019-03-12", "Wednesday": "2019-03-13", "Thursday": "2019-03-14", "Friday": "2019-03-15", } } for feature in features: easting = feature['properties']['grid_x'] northing = feature['properties']['grid_y'] mobile_library = feature['properties']['vehicle'].title() day = feature['properties']['day'].rstrip('s') community = '' stop_name = feature['properties']['location'].title() address = stop_name.title() stop_split = stop_name.split(': ') if len(stop_split) > 1: community = stop_split[0].title() stop_name = stop_split[1].title() address = stop_name + ', ' + community arrival = feature['properties']['time_arrive'].replace(':00Z', '') departure = feature['properties']['time_depart'].replace( ':00Z', '') week = feature['properties']['week'] route = mobile_library + ' ' + 'Week ' + str(week) + ' ' + day start = dates[week][day] latitude = '' longitude = '' point = geopandas.GeoSeries([Point(easting, northing)]) point.crs = {'init': 'epsg:27700'} point = point.to_crs({'init': 'epsg:4326'}) longitude = str(point[0].x) latitude = str(point[0].y) mobiles.append([ mobile_library, route, community, stop_name, address, '', longitude, latitude, day, 'Public', arrival, departure, 'FREQ=WEEKLY;INTERVAL=2', start, '', '', timetable ]) create_mobile_library_file('Angus', 'angus.csv', mobiles)