def update_classes(force_update=False): if not force_update and not wait_until_updated(): return depts_to_delete = { dept.dept_id: dept for dept in db_session.query(Dept).all()} r = requests.get('http://classutil.unsw.edu.au/') main_page = BeautifulSoup(r.text, 'html.parser') # loop through all the departments on the main page for row in main_page.find_all('table')[1].find_all('tr'): data = row.find_all('td') if data[0]['class'][0] == 'cutabhead': # row describes the campus of the below departments pass elif data[0]['class'][0] == 'data': # row describes a department links = data[:4] dept_info = data[4:] links = [d.a['href'] if d.a is not None else None for d in links] dept_id, name = (d.get_text() for d in dept_info) link = links[CURRENT_TERM] # check if the department runs in the current semester if link is not None: depts_to_delete.pop(dept_id, None) scrape_dept(dept_id, name, link) for dept in depts_to_delete.values(): db_session.delete(dept) db_session.commit()
def save_alerts(): # get info from the form # if something is invalid or they haven't given a contact or chosen classes # just show an error page because they've gotten past the javascript error # handling somehow and repopulating the chosen classes list would be super # annoying # I guess it's still TODO worthy (I'll probably never do it though) post_data = request.get_json() if not post_data: return render_template('error.html', error='Something went wrong') if post_data.get('email'): contact = post_data['email'] contact_type = CONTACT_TYPE_EMAIL if not re.match(r'^[^@]+@[^@]+\.[^@]+$', contact): return render_template('error.html', error='Please enter a valid email address') elif post_data.get('phonenumber'): contact = re.sub(r'[^0-9+]', '', post_data['phonenumber']) contact_type = CONTACT_TYPE_SMS if not re.match(r'^(04|\+?614)\d{8}$', contact): return render_template('error.html', error='Please enter a valid Australian ' + 'phone number') elif post_data.get('yoname'): contact = post_data['yoname'].upper() contact_type = CONTACT_TYPE_YO if (not re.match(r'^(\d|\w)+$', contact) or not is_valid_yo_name(contact)): return render_template('error.html', error='Please enter a valid YO username') else: return render_template('error.html', error='Please enter some contact info before ' + 'submitting.') # get course info from db klass_ids = post_data.get('classids', []) klasses = db_session.query(Klass).filter( Klass.klass_id.in_(klass_ids)).all() if not klasses: return render_template('error.html', error='Please select at least one class ' + 'before submitting.') for klass in klasses: alert = Alert(klass_id=klass.klass_id, contact_type=contact_type, contact=contact) db_session.add(alert) db_session.commit() courses = klasses_to_template_courses(klasses) return render_template('alert.html', contact_type=contact_type_description(contact_type), contact=contact, courses=courses, success_page=True)
def check_alerts(): triggered_alerts = [] for alert in db_session.query(Alert): if alert.should_alert(): triggered_alerts.append(alert) successful_alerts = send_alerts(triggered_alerts) for alert in successful_alerts: db_session.delete(alert) capture_message(('Tried to send %d alerts, %d succeeded' % (len(triggered_alerts), len(successful_alerts))), level='info') db_session.commit()
def scrape_dept(dept_id, name, page): '''scrape all the courses in a department''' # add the dept to the db dept = Dept(dept_id=dept_id, name=name) db_session.merge(dept) courses_to_delete = {course.compound_id_tuple: course for course in db_session.query(Course).filter_by(dept_id=dept_id) .all()} r = requests.get('http://classutil.unsw.edu.au/' + page) dept_page = BeautifulSoup(r.text, 'html.parser') klasses = [] course_id = '' tables = dept_page.find_all('table') if len(tables) < 3: return for row in tables[2].find_all('tr'): data = row.find_all('td') if data[0].get('class', [''])[0] == 'cucourse': # row is the code and name of a course row_course_id = data[0].b.get_text()[4:8] if row_course_id == course_id: # every now and again we get multiple title rows for the same # course continue if klasses: # scrape all the classes from the previous course and empty the # array courses_to_delete.pop((dept_id, course_id), None) scrape_course_and_classes(course_id, dept_id, name, klasses) klasses = [] course_id = row_course_id name = data[1].get_text() elif (row.get('class', [''])[0] == 'rowHighlight' or row.get('class', [''])[0] == 'rowLowlight'): # row is info about a class klasses.append(data) # scrape the classes from the last course courses_to_delete.pop((dept_id, course_id), None) scrape_course_and_classes(course_id, dept_id, name, klasses) for course in courses_to_delete.values(): db_session.delete(course) db_session.commit()
def show_alert(): klass_ids = request.args.get('classids', '') courses = [] if klass_ids: klass_ids = klass_ids.split(',') # filter out all non-numeric ids klass_ids = [ klass_id for klass_id in klass_ids if re.match(r'^\d+$', klass_id) ] # get course info from db klasses = db_session.query(Klass).filter( Klass.klass_id.in_(klass_ids)).all() courses = klasses_to_template_courses(klasses) @after_this_request def add_header(response): response.cache_control.no_store = True return response return render_template('alert.html', courses=courses)
def validate_klass_id(klass_id): klass_id = int(klass_id) if not db_session.query( exists().where(Klass.klass_id == klass_id)).scalar(): raise KeyError return klass_id
def scrape_course_and_classes(course_id, dept_id, name, klasses): '''scrape all the classes in a course''' # add the course to the db course = Course(course_id=course_id, dept_id=dept_id, name=name) db_session.merge(course) # remove from this as we find classes, eventually only the classes that are # no longer on classutil will appear here klasses_to_delete = {klass.klass_id: klass for klass in db_session.query(Klass) .filter_by(course_id=course_id, dept_id=dept_id) .all()} # Crappy hack because some course decided to list two classes with the # same id... klasses_added = set() for row in klasses: klass_type, _, klass_id, _, status, enrollment, _, time_and_place = ( d.get_text() for d in row) status = web_status_to_db_status(status) klass_id = int(klass_id) m = re.search(r'(\d+)/(\d+).*', enrollment) enrolled = int(m.group(1)) capacity = int(m.group(2)) if klass_id in klasses_added: continue # if this is a new class or the timeslot raw text has changed since we # last saw it if klass_id not in klasses_to_delete or ( hash(time_and_place) % POSTGRES_MAX_INT != klasses_to_delete[klass_id].timeslot_raw_string_hash): # if the klass already existed, delete all existing timeslots and # recreate them if klass_id in klasses_to_delete: for timeslot in klasses_to_delete[klass_id].timeslots: db_session.delete(timeslot) mentioned_times = set() for time_and_place_part in time_and_place.split(';'): m = re.search( r'(\w+) +(\d+(?::\d+)?(?:-\d+(?::\d+)?)?) *#? ' + r'*(?: *\((?:.*, *)*(.*?)\))?', time_and_place_part) if m: day = web_day_to_int_day(m.group(1)) time = m.group(2) if '-' in time: start_time, end_time = map( hour_of_day_to_seconds_since_midnight, time.split('-')) else: start_time = hour_of_day_to_seconds_since_midnight( time) end_time = start_time + 60 * 60 # only add a timeslot for the first time a specific day/time # is mentioned to avoid situations where # the location changes throughout the semester - we'll only # list the first location if (day, time) not in mentioned_times: mentioned_times.add((day, time)) location = m.group(3) # as a last resort, filter out any locations we've # extracted that don't have a letter in them # also filter out anything that looks like a range of # weeks (eg. w1-12) if location is not None and ( not re.search(r'[a-zA-Z]', location) or re.match(r'^w\d+(?:-\d+)?$', location)): location = None timeslot = Timeslot(klass_id=klass_id, day=day, start_time=start_time, end_time=end_time, location=location) db_session.add(timeslot) klass = Klass(klass_id=klass_id, course_id=course_id, dept_id=dept_id, klass_type=klass_type, status=status, enrolled=enrolled, capacity=capacity, timeslot_raw_string_hash=(hash(time_and_place) % POSTGRES_MAX_INT)) db_session.merge(klass) klasses_to_delete.pop(klass_id, None) klasses_added.add(klass_id) for klass in klasses_to_delete.values(): db_session.delete(klass)
def course_info(course_id): course_id = course_id.upper() dept_id, course_id = validate_course_id(course_id) course = db_session.query(Course).filter_by(dept_id=dept_id, course_id=course_id).one() return json.dumps(course.to_dict(with_classes=True))