def parse_special_events_html(self, special_events_html): special_events = dict() special_events["specialEvents"] = [] if not special_events_html: return special_events try: soup = BeautifulSoup(special_events_html) event_tables = soup.findAll("table", {"class": "event-item"}) for event_table in event_tables: rows = event_table.findAll("tr") event_time = "" event_date = "" lot_num_array = [] event_name = "" for row_index in range(0, 3): if row_index == 0: # we're on the header row header_content = rows[row_index].find("th").string header_array = header_content.split(":") event_date = header_array[0] event_name = header_array[1].replace(" ", "") elif row_index == 1: # time row cells = rows[row_index].findAll("td") cell_content = cells[1].string event_time_arr = cell_content.split(" ", 2) event_time = event_time_arr[0] + event_time_arr[1].replace(".", "").upper() elif row_index == 2: # lots row cells = rows[row_index].findAll("td") cell_content = cells[1].string lot_num_array = cell_content.replace(" ", "").split(",") # strip leading 0's out of lot number array for index, item in enumerate(lot_num_array): lot_num_array[index] = self.strip_leading_zeros_from_short_name(item) try: # the most brittle part of the parse if len(event_time) == 1: event_time = "0" + event_time event_datetime_tmp = event_date + " " + event_time event_datetime_str = datetime.datetime.strptime(event_datetime_tmp, "%m/%d/%Y %I:%M%p").strftime( "%Y-%m-%dT%H:%M:%S" ) except ValueError: logging.error("Error parsing campus special event date") event_datetime_str = None # Rather than exclude props not currently available via uw lots, going with "None" # This will manifest in the json as "property":null which should be easily detectable via client JS special_event = { "eventName": event_name, "parkingLocations": lot_num_array, "eventDatetime": event_datetime_str, "parkingStartDatetime": None, "parkingEndDatetime": None, "eventVenue": None, "webUrl": self.parking_data["special_events_url"], } special_events["specialEvents"].append(special_event) except (ValueError, AttributeError, TypeError, IndexError) as e: # unlike availability, we eat this error. availability is still useful w/out events logging.error("Error parsing scraped content from campus special events page." + str(e)) special_events["specialEvents"] = [] return special_events
def parse_special_events_html(self, special_events_html): special_events = dict() special_events['specialEvents'] = [] if not special_events_html: return special_events try: soup = BeautifulSoup(special_events_html) event_tables = soup.findAll('table', {'class': 'event-item'}) for event_table in event_tables: rows = event_table.findAll('tr') event_time = '' event_date = '' lot_num_array = [] event_name = '' for row_index in range(0, 3): if row_index == 0: # we're on the header row header_content = rows[row_index].find('th').string header_array = header_content.split(':') event_date = header_array[0] event_name = header_array[1].replace(' ', '') elif row_index == 1: # time row cells = rows[row_index].findAll('td') cell_content = cells[1].string event_time_arr = cell_content.split(' ', 2) event_time = event_time_arr[0] + event_time_arr[ 1].replace('.', '').upper() elif row_index == 2: # lots row cells = rows[row_index].findAll('td') cell_content = cells[1].string lot_num_array = cell_content.replace(' ', '').split(',') # strip leading 0's out of lot number array for index, item in enumerate(lot_num_array): lot_num_array[ index] = self.strip_leading_zeros_from_short_name( item) try: # the most brittle part of the parse if len(event_time) == 1: event_time = '0' + event_time event_datetime_tmp = event_date + ' ' + event_time event_datetime_str = datetime.datetime.strptime( event_datetime_tmp, '%m/%d/%Y %I:%M%p').strftime('%Y-%m-%dT%H:%M:%S') except ValueError: logging.error('Error parsing campus special event date') event_datetime_str = None # Rather than exclude props not currently available via uw lots, going with "None" # This will manifest in the json as "property":null which should be easily detectable via client JS special_event = { 'eventName': event_name, 'parkingLocations': lot_num_array, 'eventDatetime': event_datetime_str, 'parkingStartDatetime': None, 'parkingEndDatetime': None, 'eventVenue': None, 'webUrl': self.parking_data['special_events_url'] } special_events['specialEvents'].append(special_event) except (ValueError, AttributeError, TypeError, IndexError) as e: # unlike availability, we eat this error. availability is still useful w/out events logging.error( 'Error parsing scraped content from campus special events page.' + str(e)) special_events['specialEvents'] = [] return special_events