Beispiel #1
0
    def __init__(self):
        # Create server object with SSL option
        self.server = smtplib.SMTP_SSL(get_secret('MAILER_SERVER'))
        self.server.login(get_secret('MAILER_USER'), get_secret('MAILER_PASSWORD'))

        # Define to
        self.sender = '*****@*****.**'
Beispiel #2
0
    def __init__(self):
        # Create server object with SSL option
        self.server = smtplib.SMTP_SSL(get_secret('MAILER_SERVER'))
        self.server.login(get_secret('MAILER_USER'),
                          get_secret('MAILER_PASSWORD'))

        # Define to
        self.sender = '*****@*****.**'
Beispiel #3
0
 def get_google_credentials(self):
     social_user = self.user.social_auth.filter(provider='google-oauth2').first()
     if social_user is None:
         return None
     access_token = social_user.extra_data["access_token"]
     refresh_token = social_user.extra_data.get("refresh_token")
     expires_at = social_user.extra_data["expires"]
     return GoogleCredentials(access_token, get_secret('SOCIAL_AUTH_GOOGLE_OAUTH2_KEY'),
                              get_secret('SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET'), refresh_token,
                              expires_at,
                              "https://accounts.google.com/o/oauth2/token", 'my-user-agent/1.0')
Beispiel #4
0
    def __new__(cls, *args, **kwargs):
        """Set static variables within closure.

        Returns:
            Parser
        """
        new_instance = object.__new__(cls)
        cls.CREDENTIALS = {
            'USERNAME': get_secret('QUEENS_USER'),
            'PASSWORD': get_secret('QUEENS_PASS')
        }
        return new_instance
Beispiel #5
0
    def __new__(cls, *args, **kwargs):
        """Set static variables within closure.

        Returns:
            Parser
        """
        new_instance = object.__new__(cls)
        cls.CREDENTIALS = {
            'username': get_secret('GW_USER'),
            'password': get_secret('GW_PASS'),
            'security_question_answer': get_secret('GW_SECURITY_ANSWER')
        }
        return new_instance
Beispiel #6
0
    def __new__(cls, *args, **kwargs):
        """Set static variables within closure.

        Returns:
            Parser
        """
        new_instance = object.__new__(cls)
        cls.CREDENTIALS = {
            "username": get_secret("GW_USER"),
            "password": get_secret("GW_PASS"),
            "security_question_answer": get_secret("GW_SECURITY_ANSWER"),
        }
        return new_instance
Beispiel #7
0
    def __new__(cls, *args, **kwargs):
        """Set static variables within closure.

        Returns:
            Parser
        """
        new_instance = object.__new__(cls)
        cls.CREDENTIALS = {
            'username': get_secret('GW_USER'),
            'password': get_secret('GW_PASS'),
            'security_question_answer': get_secret('GW_SECURITY_ANSWER')
        }
        return new_instance
Beispiel #8
0
def set_img_url_google(student, social_user, access_token):
    response = requests.get(
        "https://www.googleapis.com/userinfo/v2/me".format(
            social_user.uid, get_secret("GOOGLE_API_KEY")),
        params={"access_token": access_token},
    )
    student.img_url = response.json()["picture"]
Beispiel #9
0
 def _login(self):
     login_url = 'https://login.mis.vanderbilt.edu'
     params = {'service': Parser.URL + '/j_spring_cas_security_check'}
     soup = self.requester.get(login_url + '/login', params=params)
     self.requester.post(
         login_url + soup.find('form', {'name': 'loginForm'})['action'],
         parse=False,
         params=params,
         data={
             'username': get_secret('VANDY_USER'),
             'password': get_secret('VANDY_PASS'),
             'lt': soup.find('input', {'name': 'lt'})['value'],
             '_eventId': 'submit',
             'submit': 'LOGIN'
         },
     )
     self.requester.get(Parser.URL + '/Entry.action', parse=False)
Beispiel #10
0
 def _login(self):
     login_url = "https://login.mis.vanderbilt.edu"
     params = {"service": Parser.URL + "/j_spring_cas_security_check"}
     soup = self.requester.get(login_url + "/login", params=params)
     self.requester.post(
         login_url + soup.find("form", {"name": "loginForm"})["action"],
         parse=False,
         params=params,
         data={
             "username": get_secret("VANDY_USER"),
             "password": get_secret("VANDY_PASS"),
             "lt": soup.find("input", {"name": "lt"})["value"],
             "_eventId": "submit",
             "submit": "LOGIN",
         },
     )
     self.requester.get(Parser.URL + "/Entry.action", parse=False)
Beispiel #11
0
    def __new__(cls, *args, **kwargs):
        """Set static variables within closure.

        Returns:
            Parser
        """
        new_instance = object.__new__(cls, *args, **kwargs)
        cls.KEY = get_secret('JHU_API_KEY')
        return new_instance
Beispiel #12
0
    def __new__(cls, *args, **kwargs):
        """Set static variables within closure.

        Returns:
            Parser
        """
        new_instance = object.__new__(cls, *args, **kwargs)
        cls.KEY = get_secret('JHU_API_KEY')
        return new_instance
Beispiel #13
0
 def _login(self):
     login_url = 'https://login.mis.vanderbilt.edu'
     params = {
         'service': Parser.URL + '/j_spring_cas_security_check'
     }
     soup = self.requester.get(login_url + '/login', params=params)
     self.requester.post(
         login_url + soup.find('form', {'name': 'loginForm'})['action'],
         parse=False,
         params=params,
         data={
             'username': get_secret('VANDY_USER'),
             'password': get_secret('VANDY_PASS'),
             'lt': soup.find('input', {'name': 'lt'})['value'],
             '_eventId': 'submit',
             'submit': 'LOGIN'
         },
     )
     self.requester.get(Parser.URL + '/Entry.action', parse=False)
Beispiel #14
0
def amazon_textbook_fields(isbn):
    if amazon is None:
        amazon = AmazonAPI(get_secret('AMAZON_ACCESS_KEY'),
                           get_secret('AMAZON_SECRET_KEY'),
                           get_secret('AMAZON_ASSOC_TAG'))
    try:
        product = amazon.lookup(ItemId=isbn,
                                IdType='ISBN',
                                SearchIndex='Books')
    except AsinNotFound:
        return

    if isinstance(product, list):
        product = product[0]

    return {
        'detail_url': product.detail_page_url,
        'image_url': product.medium_image_url,
        'author': product.author,
        'title': product.title,
    }
def amazon_textbook_fields(isbn):
    if amazon is None:
        amazon = AmazonAPI(
            get_secret("AMAZON_ACCESS_KEY"),
            get_secret("AMAZON_SECRET_KEY"),
            get_secret("AMAZON_ASSOC_TAG"),
        )
    try:
        product = amazon.lookup(ItemId=isbn,
                                IdType="ISBN",
                                SearchIndex="Books")
    except AsinNotFound:
        return

    if isinstance(product, list):
        product = product[0]

    return {
        "detail_url": product.detail_page_url,
        "image_url": product.medium_image_url,
        "author": product.author,
        "title": product.title,
    }
Beispiel #16
0
# Copyright (C) 2017 Semester.ly Technologies, LLC
#
# Semester.ly is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Semester.ly is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

from semesterly.settings import get_secret

USER = get_secret('QUEENS_USER')
PASS = get_secret('QUEENS_PASS')
OUTPUT_DIR = "./data-dump"
PROFILE = None
MAX_RETRIES = 10
RETRY_SLEEP_SECONDS = 10
LOG_DIR = "./parsing/schools/queens/qcumber_scraper/logs"
SAVE_TO_DB = False  # writes to JSON if False
Beispiel #17
0
class Parser(BaseParser):
    """Hopkins course parser.

    Attributes:
        API_URL (str): Description
        DAY_TO_LETTER_MAP (TYPE): Description
        KEY (str): Description
        last_course (dict): Description
        schools (list): Description
        semester (TYPE): Description
        verbosity (TYPE): Description
    """

    API_URL = 'https://isis.jhu.edu/api/classes/'
    KEY = get_secret('JHU_API_KEY')
    DAY_TO_LETTER_MAP = {
        'm': 'M',
        't': 'T',
        'w': 'W',
        'th': 'R',
        'f': 'F',
        'sa': 'S',
        's': 'U'
    }

    def __init__(self, **kwargs):
        """Construct hopkins parser object."""
        self.schools = []
        self.last_course = {}
        super(Parser, self).__init__('jhu', **kwargs)

    def _get_schools(self):
        url = '{}/codes/schools'.format(Parser.API_URL)
        params = {'key': Parser.KEY}
        self.schools = self.requester.get(url, params=params)

    def _get_courses(self, school):
        url = '{}/{}/{}'.format(Parser.API_URL, school['Name'], self.semester)
        params = {'key': Parser.KEY}
        return self.requester.get(url, params=params)

    def _get_section(self, course):
        return self.requester.get(self._get_section_url(course))

    def _get_section_url(self, course):
        return Parser.API_URL + '/' \
            + course['OfferingName'].replace(".", "") + course['SectionName'] \
            + '/' + self.semester + '?key=' + Parser.KEY

    def _parse_schools(self):
        for school in self.schools:
            self._parse_school(school)

    def _parse_school(self, school):
        courses = self._get_courses(school)
        for course in courses:
            section = self._get_section(course)
            if len(section) == 0:
                # FIXME - make this less hacky
                hacky_log_file = 'parsing/schools/jhu/logs/section_url_tracking.log'
                with open(hacky_log_file, 'w') as f:
                    print(self._get_section_url(course), file=f)
                continue
            self._load_ingestor(course, section)

    def _compute_size_enrollment(self, course):
        try:
            section_size = int(course['MaxSeats'])
        except:
            section_size = 0
        try:
            section_enrolment = section_size \
                - int(course['SeatsAvailable'].split("/")[0])
            if section_enrolment < 0:
                section_enrolment = 0
        except:
            section_enrolment = 0
        try:
            waitlist = int(course.get('Waitlisted', -1))
        except ValueError:
            waitlist = -1
        return (section_size, section_enrolment, waitlist)

    def _load_ingestor(self, course, section):
        section_details = section[0]['SectionDetails']
        try:
            num_credits = float(course['Credits'])
        except:
            num_credits = 0

        # Load core course fields
        self.ingestor['areas'] = filter(lambda a: a != "None",
                                        course['Areas'].split(','))
        if course['IsWritingIntensive'] == "Yes":
            self.ingestor['areas'] += ['Writing Intensive']

        if len(section_details[0]['Prerequisites']) > 0:
            prereqs = []
            for p in section_details[0]['Prerequisites']:
                prereqs.append(p['Description'])
            self.ingestor['prerequisites'] = ' '.join(prereqs)

        self.ingestor['level'] = re.findall(re.compile(r".+?\..+?\.(.{1}).+"),
                                            course['OfferingName'])[0] + "00"
        self.ingestor['name'] = titlize(course['Title'])
        self.ingestor['description'] = section_details[0]['Description']
        self.ingestor['code'] = course['OfferingName'].strip()
        self.ingestor['num_credits'] = num_credits
        self.ingestor['department_name'] = ' '.join(
            course['Department'].split()[1:])
        self.ingestor['campus'] = 1
        self.ingestor['exclusions'] = section_details[0].get(
            'EnrollmentRestrictedTo')

        # Add specialty areas for computer science department
        if course['Department'] == 'EN Computer Science':
            cs_areas_re = r'\bApplications|\bAnalysis|\bSystems|\bGeneral'
            for match in re.findall(cs_areas_re, self.ingestor['description']):
                self.ingestor['areas'] += [match]

        created_course = self.ingestor.ingest_course()
        if self.last_course \
           and created_course['code'] == course['OfferingName'].strip() \
           and created_course['name'] != course['Title']:
            self.ingestor['section_name'] = course['OfferingName'].strip()
        self.last_course = created_course

        for meeting in section_details[0]['Meetings']:
            # Load core section fields
            self.ingestor['section'] = "(" + section[0]['SectionName'] + ")"
            self.ingestor['semester'] = self.semester.split()[0]
            self.ingestor['instrs'] = map(lambda i: i.strip(),
                                          course['Instructors'].split(','))
            self.ingestor['size'], self.ingestor['enrollment'], self.ingestor[
                'waitlist'] = self._compute_size_enrollment(course)
            self.ingestor['year'] = self.semester.split()[1]

            created_section = self.ingestor.ingest_section(created_course)

            # Load offering fields.
            times = meeting['Times']
            for time in filter(lambda t: len(t) > 0, times.split(',')):
                time_pieces = re.search(
                    r'(\d\d:\d\d [AP]M) - (\d\d:\d\d [AP]M)', time)
                self.ingestor['time_start'] = time_12to24(time_pieces.group(1))
                self.ingestor['time_end'] = time_12to24(time_pieces.group(2))
                if (len(meeting['DOW'].strip()) > 0 and meeting['DOW'] != "TBA"
                        and meeting['DOW'] != "None"):
                    self.ingestor['days'] = map(
                        lambda d: Parser.DAY_TO_LETTER_MAP[d.lower()],
                        re.findall(r'([A-Z][a-z]*)+?', meeting['DOW']))
                    self.ingestor['location'] = {
                        'building': meeting['Building'],
                        'room': meeting['Room']
                    }
                    self.ingestor.ingest_meeting(created_section)

    def start(self,
              years=None,
              terms=None,
              years_and_terms=None,
              departments=None,
              textbooks=True,
              verbosity=3,
              **kwargs):
        """Start parse."""
        self.verbosity = verbosity

        # Defualt to hardcoded current year.
        if not years:
            years = ['2017', '2016']
        if not terms:
            terms = ['Spring', 'Fall', 'Summer']

        # Run parser for all semesters specified.
        for year in years:
            for term in terms:
                self.semester = '{} {}'.format(term, year)
                self._get_schools()
                self._parse_schools()
Beispiel #18
0
class Parser(QPeoplesoftParser):
    """Course parser for Queens University."""

    BASE_URL = 'https://saself.ps.queensu.ca/psc/saself/EMPLOYEE/HRMS/c/'\
               'SA_LEARNER_SERVICES.CLASS_SEARCH.GBL'
    CREDENTIALS = {
        'USERNAME': get_secret('QUEENS_USER'),
        'PASSWORD': get_secret('QUEENS_PASS')
    }

    def __init__(self, **kwargs):
        """Construct parsing object."""
        params = {
            'Page': 'SSR_CLSRCH_ENTRY',
            'Action': 'U',
            'ExactKeys': 'Y',
            'TargetFrameName': 'None'
        }
        self.cap = webdriver.DesiredCapabilities.PHANTOMJS
        self.cap["phantomjs.page.settings.resourceTimeout"] = 50000000
        self.cap["phantomjs.page.settings.loadImages"] = False
        self.cap[
            "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0) Gecko/20121026 Firefox/16.0'
        self.driver = webdriver.PhantomJS(desired_capabilities=self.cap)
        # NOTE: comment being saved in case this is important for local dev.
        # self.driver = webdriver.PhantomJS(
        #     './node_modules/phantomjs-prebuilt/bin/phantomjs',
        #     desired_capabilities=self.cap
        # )
        # self.driver = webdriver.Chrome()  # FOR DEBUG PURPOSES ONLY

        super(Parser, self).__init__('queens',
                                     Parser.BASE_URL,
                                     url_params=params,
                                     **kwargs)

    def seleni_run(self, execute):
        """Run selenium routine."""
        while True:
            try:
                return execute()
            except:
                continue

    def login(self):
        """Login to Queens course listings website."""
        socket.setdefaulttimeout(60)
        self.driver.set_page_load_timeout(30)
        self.driver.implicitly_wait(30)
        self.driver.get('https://my.queensu.ca/')
        self.seleni_run(lambda: self.driver.find_element_by_id('username').
                        send_keys(Parser.CREDENTIALS['USERNAME']))
        self.seleni_run(lambda: self.driver.find_element_by_id('password').
                        send_keys(Parser.CREDENTIALS['PASSWORD']))
        self.seleni_run(lambda: self.driver.find_element_by_class_name(
            'form-button').click())
        self.seleni_run(
            lambda: self.driver.find_element_by_link_text("SOLUS").click())

        # Focus iframe
        iframe = self.seleni_run(lambda: self.driver.find_element_by_xpath(
            "//iframe[@id='ptifrmtgtframe']"))
        self.driver.switch_to_frame(iframe)

        self.seleni_run(
            lambda: self.driver.find_element_by_link_text("Search").click())

        # transfer Selenium cookies to Requester cookies
        for cookie in self.driver.get_cookies():
            c = {cookie['name']: cookie['value']}
            self.requester.session.cookies.update(c)

        # Close Selenium/PhantomJS process.
        # REF: http://stackoverflow.com/questions/25110624/how-to-properly-stop-phantomjs-execution
        # NOTE: update selenium version after fix released
        #  (https://github.com/hydroshare/hydroshare/commit/f7ef2a867250aac86b3fd12821cabf5524c2cb17)
        self.driver.close()
        self.driver.service.process.send_signal(signal.SIGTERM)
        self.driver.quit()

        headers = {
            'Pragma': 'no-cache',
            'Accept-Encoding': 'gzip, deflate, sdch, br',
            'Accept-Language': 'en-US,en;q=0.8',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Referer':
            'https://saself.ps.queensu.ca/psc/saself/EMPLOYEE/HRMS/c/SA_LEARNER_SERVICES.SSS_STUDENT_CENTER.GBL?PortalActualURL=https%3a%2f%2fsaself.ps.queensu.ca%2fpsc%2fsaself%2fEMPLOYEE%2fHRMS%2fc%2fSA_LEARNER_SERVICES.SSS_STUDENT_CENTER.GBL&PortalContentURL=https%3a%2f%2fsaself.ps.queensu.ca%2fpsc%2fsaself%2fEMPLOYEE%2fHRMS%2fc%2fSA_LEARNER_SERVICES.SSS_STUDENT_CENTER.GBL&PortalContentProvider=HRMS&PortalCRefLabel=Student%20Center&PortalRegistryName=EMPLOYEE&PortalServletURI=https%3a%2f%2fsaself.ps.queensu.ca%2fpsp%2fsaself%2f&PortalURI=https%3a%2f%2fsaself.ps.queensu.ca%2fpsc%2fsaself%2f&PortalHostNode=HRMS&NoCrumbs=yes&PortalKeyStruct=yes',
            'Connection': 'keep-alive',
            'Cache-Control': 'no-cache',
        }

        self.requester.headers = headers

        # NOTE: get request will update CookieJar
        self.requester.get(Parser.BASE_URL,
                           params={
                               'Page': 'SSR_CLSRCH_ENTRY',
                               'Action': 'U',
                               'ExactKeys': 'Y',
                               'TargetFrameName': 'None'
                           })

    def start(self, verbosity=3, **kwargs):
        """Start parse."""
        self.login()
        super(Parser, self).start(verbosity=verbosity, **kwargs)
Beispiel #19
0
from rest_framework.views import APIView

from analytics.models import SharedTimetable
from analytics.views import save_analytics_timetable
from courses.serializers import CourseSerializer
from student.utils import get_student
from timetable.serializers import DisplayTimetableSerializer
from timetable.models import Semester, Course, Section
from timetable.utils import (
    update_locked_sections,
    courses_to_timetables,
)
from helpers.mixins import ValidateSubdomainMixin, FeatureFlowView, CsrfExemptMixin
from semesterly.settings import get_secret

hashids = Hashids(salt=get_secret("HASHING_SALT"))
logger = logging.getLogger(__name__)


class TimetableView(CsrfExemptMixin, ValidateSubdomainMixin, APIView):
    """
    This view is responsible for responding to any requests dealing with the
    generation of timetables and the satisfaction of constraints provided by
    the frontend/user.
    """
    def post(self, request):
        """Generate best timetables given the user's selected courses"""
        school = request.subdomain
        params = request.data
        student = get_student(request)
        course_ids = list(params["courseSections"].keys())
Beispiel #20
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from django.shortcuts import get_object_or_404
from hashids import Hashids

from jhu_final_exam_scheduler import JHUFinalExamScheduler
from helpers.mixins import FeatureFlowView, CsrfExemptMixin
from exams.models import FinalExamShare
from student.utils import get_student
from semesterly.settings import get_secret

hashids = Hashids(salt=get_secret('HASHING_SALT'))


# TODO: use new request shape
class ExamView(CsrfExemptMixin, APIView):

    def post(self, request):
        final_exam_schedule = JHUFinalExamScheduler().make_schedule(request.data)
        return Response(final_exam_schedule, status=status.HTTP_200_OK)


class ExamLink(FeatureFlowView):
    feature_name = 'SHARE_EXAM'

    def get_feature_flow(self, request, slug):
        exam_id = hashids.decrypt(slug)[0]
Beispiel #21
0
class Parser(CourseParser):
    """George Washington University course parser.

    NOTE: GW cannot support multiple login!
    """

    URL = 'https://banweb.gwu.edu/PRODCartridge'
    CREDENTIALS = {
        'USERNAME': get_secret('GW_USER'),
        'PASSWORD': get_secret('GW_PASS'),
        'SECURITY_QUESTION_ANSWER': get_secret('GW_SECURITY_ANSWER')
    }
    YEARS_AND_TERMS = {
        2017: {
            'Fall': '201703',
            'Spring': '201701',
        },
        2016: {
            'Fall': '201603',
        }
    }

    def __init__(self, **kwargs):
        """Construct GW parser object.

        Args:
            **kwargs: pass-through
        """
        super(Parser, self).__init__('gw', **kwargs)

    def start(self,
              years=None,
              terms=None,
              years_and_terms=None,
              departments=None,
              verbosity=3,
              **kwargs):
        """Start parse."""
        self._login()
        self._direct_to_search_page()

        years_and_terms = filter_years_and_terms(
            Parser.YEARS_AND_TERMS,
            years_filter=years,
            terms_filter=terms,
            years_and_terms_filter=years_and_terms)

        for year, terms in years_and_terms.items():
            self.ingestor['year'] = year
            for term_name in terms:
                term_code = Parser.YEARS_AND_TERMS[year][term_name]
                self.ingestor['term'] = term_name

                # Retrieve term search page.
                soup = self.requester.get(Parser.URL +
                                          '/bwckgens.p_proc_term_date',
                                          params={
                                              'p_calling_proc': 'P_CrseSearch',
                                              'p_term': term_code
                                          })

                # Create search param list.
                input_options_soup = soup.find(
                    'form',
                    action='/PRODCartridge/bwskfcls.P_GetCrse').find_all(
                        'input')

                query = {}
                for input_option in input_options_soup:
                    query[input_option['name']] = input_option.get('value', '')
                query.update({
                    'begin_hh': '0',
                    'begin_mi': '0',
                    'end_hh': '0',
                    'end_mi': '0',
                    'sel_ptrm': '%',
                    'SUB_BTN': 'Section Search'
                })

                # Construct list of departments.
                depts = {}
                depts_soup = soup.find('select',
                                       id='subj_id').find_all('option')
                for dept_soup in depts_soup:
                    depts[dept_soup.text.strip()] = dept_soup['value']

                for dept_name, dept_code in depts.iteritems():
                    self.ingestor['department'] = {
                        'name': dept_name,
                        'code': dept_code
                    }

                    query['sel_subj'] = ['dummy', dept_code]

                    rows = self.requester.post(Parser.URL +
                                               '/bwskfcls.P_GetCrse',
                                               params=query)

                    Parser._check_errorpage(rows)

                    try:
                        rows = rows.find(
                            'table',
                            class_='datadisplaytable').find_all('tr')[2:]
                    except AttributeError:
                        print('message: no results for department',
                              dept_name,
                              file=sys.stderr)
                        continue  # no results for department

                    # collect offered courses in department
                    for row in rows:
                        info = row.find_all('td')
                        if info[1].find('a'):

                            # general info
                            self.ingestor.update({
                                'ident':
                                info[1].text,
                                'code':
                                info[2].text + ' ' + info[3].text,
                                'href':
                                info[1].find('a')['href'],
                                'dept':
                                dept_name,
                                'selec':
                                info[3].text,
                                'section':
                                info[4].text,
                                'credits':
                                safe_cast(info[6].text, float, default=0.),
                                'name':
                                info[7].text,
                                'size':
                                int(info[10].text),
                                'enrollment':
                                int(info[11].text),
                                'waitlist':
                                safe_cast(info[14].text, int, default=-1),
                                'attr':
                                '; '.join(info[22].text.split(' and '))
                                if len(info) == 23 else ''  # FIXME - hacky fix
                            })

                            # Query course catalog to obtain description.
                            catalog = self.requester.get(
                                Parser.URL + '/bwckctlg.p_display_courses',
                                params={
                                    'term_in': term_code,
                                    'one_subj': dept_code,
                                    'sel_crse_strt': self.ingestor['selec'],
                                    'sel_crse_end': self.ingestor['selec'],
                                    'sel_subj': '',
                                    'sel_levl': '',
                                    'sel_schd': '',
                                    'sel_coll': '',
                                    'sel_divs': '',
                                    'sel_dept': '',
                                    'sel_attr': ''
                                })

                            if catalog:
                                self.ingestor.update(
                                    Parser._parse_catalogentrypage(catalog))

                            course = self.ingestor.ingest_course()

                            section_soup = self.requester.get(
                                Parser.URL + '/bwckschd.p_disp_listcrse',
                                params={
                                    'term_in': term_code,
                                    'subj_in': dept_code,
                                    'crse_in': self.ingestor['selec'],
                                    'crn_in': self.ingestor['ident']
                                })

                            meetings_soup = Parser._extract_meetings(
                                section_soup)
                            """Example of a meeting entry
                            <tr>
                                <td class="dddefault">Class</td>
                                <td class="dddefault">4:00 pm - 6:00 pm</td>
                                <td class="dddefault">T</td>
                                <td class="dddefault">See Department DEPT</td>
                                <td class="dddefault">08/28/17 - 12/11/17</td>
                                <td class="dddefault">Lecture</td>
                                <td class="dddefault">Timothy A.  McCaffrey (<abbr title="Primary">P</abbr>), David   Leitenberg </td>
                            </tr>
                            """

                            self._parse_instructors(meetings_soup)

                            if len(meetings_soup) > 0:
                                self.ingestor['section_type'] = meetings_soup[
                                    0].find_all('td')[5].text
                                section_model = self.ingestor.ingest_section(
                                    course)

                            self._parse_meetings(meetings_soup, section_model)

    def _login(self):
        # Collect necessary cookies
        self.requester.get(Parser.URL + '/twbkwbis.P_WWWLogin', parse=False)

        self.requester.headers['Referer'] = '{}/twbkwbis.P_WWWLogin'.format(
            Parser.URL)

        logged_in = self.requester.post(Parser.URL + '/twbkwbis.P_ValLogin',
                                        parse=False,
                                        data={
                                            'sid':
                                            Parser.CREDENTIALS['USERNAME'],
                                            'PIN':
                                            Parser.CREDENTIALS['PASSWORD']
                                        })

        if logged_in.status_code != 200:
            print('Unexpected error: login unsuccessful',
                  sys.exc_info()[0],
                  file=sys.stderr)
            raise Exception('GW Parser, failed login')

        # Deal with security question page.
        self.requester.post('{}/twbkwbis.P_ProcSecurityAnswer'.format(
            Parser.URL),
                            parse=False,
                            data={
                                'RET_CODE':
                                '',
                                'SID':
                                Parser.CREDENTIALS['USERNAME'],
                                'QSTN_NUM':
                                1,
                                'answer':
                                Parser.CREDENTIALS['SECURITY_QUESTION_ANSWER']
                            })

    def _direct_to_search_page(self):
        genurl = Parser.URL + '/twbkwbis.P_GenMenu'
        actions = ['bmenu.P_MainMnu', 'bmenu.P_StuMainMnu', 'bmenu.P_RegMnu']
        map(lambda n: self.requester.get(genurl, params={'name': n}), actions)
        self.requester.get(Parser.URL + '/bwskfcls.P_CrseSearch',
                           parse=False,
                           params={'term_in': ''})

    def _parse_meetings(self, meetings_soup, section_model):
        for meeting_soup in meetings_soup:
            col = meeting_soup.find_all('td')
            time = re.match(r'(.*) - (.*)', col[1].text)
            if not time:
                continue
            self.ingestor['time_start'] = self.extractor.time_12to24(
                time.group(1))
            self.ingestor['time_end'] = self.extractor.time_12to24(
                time.group(2))
            self.ingestor['days'] = [col[2].text]
            filtered_days = filter(lambda x: x.replace(u'\xa0', u''),
                                   self.ingestor['days'])
            if len(filtered_days) == 0:
                break
            self.ingestor['location'] = col[3].text
            self.ingestor.ingest_meeting(section_model)

    def _parse_instructors(self, meetings):
        self.ingestor['instrs'] = []
        for meeting in meetings:
            instructors = meeting.find_all('td')[6].text.split(',')

            # NOTE: must constrain instructor length LAW 6683
            for instructor in instructors[:20]:
                # Remove extra internal spaces.
                instructor = ' '.join(instructor.split())

                # Remove primary tag from instructor name.
                instructor = re.match(r'(.*?)(?: \(P\))?$',
                                      instructor).group(1)

                self.ingestor['instrs'].append(instructor)

    @staticmethod
    def _parse_catalogentrypage(soup):
        fields = {}
        meat = soup.find('body').find('table', class_='datadisplaytable')
        if meat is None:
            return {}
        fields.update({'descr': Parser._extract_description(meat)})
        fields.update(Parser._extract_info(meat.find('td',
                                                     class_='ntdefault')))
        return fields

    @staticmethod
    def _extract_description(soup):
        try:
            meat = soup.find_all('tr', recursive=False)[1].find('td')
            descr = re.match(r'<td .*?>\n([^<]+)<[^$]*</td>', meat.prettify())
            return ' '.join(descr.group(1).strip().splitlines())
        except:
            return ''

    @staticmethod
    def _extract_info(soup):
        # Link field in <span> tag to text proceeding it.
        fields = {}
        for t in soup.find_all('span', class_='fieldlabeltext'):
            data = t.next_sibling

            # Skip newline tags.
            while data and isinstance(data, Tag) and data.name == 'br':
                data = data.next_sibling

            if not isinstance(data, NavigableString):
                data = data.text
            fields[t.text.strip()[:-1]] = data

        extraction = {
            'Schedule Types': ('section_type', lambda s: s[0].upper()),
            'Levels': ('info', lambda s: 'Levels: ' + s.strip()),
            'Course Attributes': ('areas', lambda x: x.strip().split(','))
        }

        # Filter and map over (header, content) pairs.
        extracted = {}
        for name, data in fields.items():
            if extraction.get(name):
                extracted[extraction[name][0]] = extraction[name][1](data)

        return extracted

    @staticmethod
    def _extract_meetings(soup):
        meetings = soup.find('table', class_='datadisplaytable')
        if meetings:
            meetings = meetings.find('table', class_='datadisplaytable')
            if meetings:
                meetings = meetings.find_all('tr')[1:]
        if meetings:
            return meetings
        else:
            return []

    @staticmethod
    def _check_errorpage(soup):
        error = soup.find('span', class_='errortext')
        if not error:
            return
        raise CourseParseError('Error on page request, message: ' + error.text)
Beispiel #22
0
# Copyright (C) 2017 Semester.ly Technologies, LLC
#
# Semester.ly is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Semester.ly is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

from semesterly.settings import get_secret

USER = get_secret('QUEENS_USER')
PASS = get_secret('QUEENS_PASS')
OUTPUT_DIR = "./data-dump"
PROFILE = None
MAX_RETRIES = 10
RETRY_SLEEP_SECONDS = 10
LOG_DIR = "./parsing/schools/queens/qcumber_scraper/logs"
SAVE_TO_DB = False # writes to JSON if False
Beispiel #23
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

import json
import urllib.request, urllib.error, urllib.parse

import requests
from django.conf import settings
from django.contrib.auth.models import User
from django.core.signing import TimestampSigner, BadSignature, SignatureExpired
from hashids import Hashids

from student.models import Student
from semesterly.settings import get_secret

hashids = Hashids(salt=get_secret('HASHING_SALT'))


def check_student_token(student, token):
    """
    Validates a token: checks that it is at most 2 days old and that it
    matches the currently authenticated student.
    """
    try:
        key = '%s:%s' % (student.id, token)
        TimestampSigner().unsign(key, max_age=60 * 60 * 48)  # Valid for 2 days
    except (BadSignature, SignatureExpired):
        return False
    return True

Beispiel #24
0
def create_student(strategy, details, response, user, *args, **kwargs):
    """
    Part of the Python Social Auth pipeline which creates a student upon
    signup. If student already exists, updates information from Facebook
    or Google (depending on the backend).
    Saves friends and other information to fill database.
    """
    backend_name = kwargs['backend'].name
    if Student.objects.filter(user=user).exists():
        new_student = Student.objects.get(user=user)
    else:
        new_student = Student(user=user)
        new_student.save()
    social_user = user.social_auth.filter(
        provider=backend_name,
    ).first()

    if backend_name == 'google-oauth2' and not user.social_auth.filter(
            provider='facebook').exists():
        try:
            access_token = social_user.extra_data["access_token"]
        except TypeError:
            access_token = json.loads(social_user.extra_data)["access_token"]
        response = requests.get(
            'https://www.googleapis.com/userinfo/v2/me'.format(
                social_user.uid,
                get_secret('GOOGLE_API_KEY')),
            params={'access_token': access_token}
        )
        new_student.img_url = response.json()['picture']
        new_student.save()

    elif backend_name == 'facebook':

        try:
            access_token = social_user.extra_data["access_token"]
        except TypeError:
            access_token = json.loads(social_user.extra_data)["access_token"]

        if social_user:
            new_student.img_url = 'https://graph.facebook.com/v9.0/' + social_user.uid + '/picture?type=normal'
            url = 'https://graph.facebook.com/v9.0/{0}/' \
                  '&access_token={1}'.format(
                      social_user.uid,
                      access_token,
                  )
            request = urllib.request.Request(url)
            new_student.fbook_uid = social_user.uid
            new_student.save()
            url = 'https://graph.facebook.com/{0}/' \
                  'friends?fields=id' \
                  '&access_token={1}'.format(
                      social_user.uid,
                      access_token,
                  )
            request = urllib.request.Request(url)
            friends = json.loads(urllib.request.urlopen(request).read().decode('utf-8')).get('data')

            for friend in friends:
                if Student.objects.filter(fbook_uid=friend['id']).exists():
                    friend_student = Student.objects.get(
                        fbook_uid=friend['id'])
                    if not new_student.friends.filter(
                            user=friend_student.user).exists():
                        new_student.friends.add(friend_student)
                        new_student.save()
                        friend_student.save()

    return kwargs
    
Beispiel #25
0
class Parser(BaseParser):
    """Vanderbilt course parser.

    Attributes:
        API_URL (str): Description
        course (TYPE): Description
        CREDENTIALS (TYPE): Description
        departments (dict): Description
        SCHOOL (str): Description
        verbosity (TYPE): Description
    """

    API_URL = 'https://webapp.mis.vanderbilt.edu/more'
    CREDENTIALS = {
        'USERNAME': get_secret('VANDY_USER'),
        'PASSWORD': get_secret('VANDY_PASS')
    }

    def __init__(self, **kwargs):
        """Construct parser instance.

        Args:
            **kwargs: pass-through
        """
        self.departments = {}
        self.course = {
            'description': '',
            'cancelled': False
        }
        super(Parser, self).__init__('vandy', **kwargs)

    def login(self):
        if self.verbosity > 2:
            print("Logging in...")
        login_url = 'https://login.mis.vanderbilt.edu'
        get_login_url = login_url + '/login'
        params = {
            'service': Parser.API_URL + '/j_spring_cas_security_check'
        }
        soup = self.requester.get(get_login_url, params)
        post_suffix_url = soup.find('form', {'name': 'loginForm'})['action']
        sec_block = soup.find('input', {'name': 'lt'})['value']
        login_info = {
            'username': Parser.CREDENTIALS['USERNAME'],
            'password': Parser.CREDENTIALS['PASSWORD'],
            'lt': sec_block,
            '_eventId': 'submit',
            'submit': 'LOGIN'
        }
        self.requester.post(login_url + post_suffix_url,
                            login_info, params,
                            parse=False)
        self.requester.get(Parser.API_URL + '/Entry.action',
                           parse=False)

    def start(self,
              years=None,
              terms=None,
              departments=None,
              textbooks=True,
              verbosity=3):

        self.verbosity = verbosity

        self.login()

        # TODO - read from site and filter based on kwargs
        years_and_terms = {
            '2016': {
                'Fall': '0875'
            },
            '2017': {
                'Spring': '0880',
                'Fall': '0895',
                'Summer': '0885',
            }
        }

        years_and_terms = self.extractor.filter_term_and_year(
            years_and_terms,
            years,
            terms
        )

        for year, semesters in years_and_terms.items():
            if self.verbosity >= 1:
                print('>   Parsing year ' + year)
            self.ingestor['year'] = year

            for semester_name, semester_code in semesters.items():

                if self.verbosity >= 1:
                    print('>>  Parsing semester ' + semester_name)
                self.ingestor['semester'] = semester_name

                # Load environment for targeted semester
                self.requester.get(
                    '{}{}'.format(
                        Parser.API_URL,
                        '/SelectTerm!selectTerm.action'),
                    {'selectedTermCode': semester_code},
                    parse=False)

                self.requester.get(
                    '{}{}'.format(
                        Parser.API_URL,
                        '/SelectTerm!updateSessions.action'),
                    parse=False)

                # Get a list of all the department codes
                department_codes = self.extract_department_codes()
                department_codes = self.extractor.filter_departments(
                    department_codes,
                    departments
                )

                # Create payload to request course list from server
                payload = {
                    'searchCriteria.classStatusCodes': [
                        'O', 'W', 'C'
                    ],
                    '__checkbox_searchCriteria.classStatusCodes': [
                        'O', 'W', 'C'
                    ]
                }

                for department_code in department_codes:

                    if self.verbosity >= 1:
                        print('>>> Parsing courses in',
                              self.departments[department_code])

                    # Construct payload with department code
                    payload.update({
                        'searchCriteria.subjectAreaCodes': department_code
                    })

                    # GET html for department course listings
                    html = self.requester.get(
                        '{}{}'.format(
                            Parser.API_URL,
                            '/SearchClassesExecute!search.action'
                        ),
                        payload
                    )

                    # Parse courses in department
                    self.parse_courses_in_department(html)

                # return to search page for next iteration
                self.requester.get(Parser.API_URL + '/Entry.action',
                                   parse=False)

    def create_course(self):
        self.ingestor['school'] = 'vandy'
        self.ingestor['campus'] = 1
        self.ingestor['code'] = self.course.get('code')
        self.ingestor['name'] = self.course.get('name')
        self.ingestor['description'] = self.course.get('description', '')
        self.ingestor['num_credits'] = safe_cast(self.course.get('Hours'),
                                                 float,
                                                 default=0.)
        self.ingestor['areas'] = filter(
            lambda a: bool(a),
            self.course.get('Attributes', '').split(',')
        )

        self.ingestor['prerequisites'] = self.course.get('Requirement(s)')
        self.ingestor['department_name'] = self.departments.get(
            self.course.get('department')
        )
        self.ingestor['level'] = '0'

        created_course = self.ingestor.ingest_course()
        return created_course

    @staticmethod
    def is_float(f):
        try:
            float(f)
            return True
        except TypeError:
            return False

    def create_section(self, created_course):
        if self.course.get('cancelled'):
            self.course['cancelled'] = False
            return None

        else:
            self.ingestor['section'] = self.course.get('section')
            self.ingestor['instructors'] = self.course.get('Instructor(s)', '')
            self.ingestor['size'] = int(self.course.get('Class Capacity'))
            self.ingestor['enrolment'] = int(self.course.get('Total Enrolled'))

            created_section = self.ingestor.ingest_section(created_course)
            return created_section

    def create_offerings(self, created_section):
        if self.course.get('days'):
            for day in list(self.course.get('days')):
                self.ingestor['day'] = day
                self.ingestor['time_start'] = self.course.get('time_start')
                self.ingestor['time_end'] = self.course.get('time_end')
                self.ingestor['location'] = self.course.get('Location')
                self.ingestor.ingest_meeting(created_section)

    def print_course(self):
        for label in self.course:
            try:
                print(label + "::" + self.course[label] + '::')
            except:
                sys.stderr.write("error: UNICODE ERROR\n")
                print(sys.exc_info()[0])

    def update_current_course(self, label, value):
        try:
            self.course[label] = value.strip()
        except:
            print('label:', label, sys.exc_info()[0])
            sys.stderr.write("UNICODE ERROR\n")

    def extract_department_codes(self):

        # Query Vandy class search website
        soup = self.requester.get(
            Parser.API_URL + '/SearchClasses!input.action',
            parse=True)

        # Retrieve all deparments from dropdown in advanced search
        department_entries = soup.find_all(
            id=re.compile("subjAreaMultiSelectOption[0-9]"))

        # Extract department codes from parsed department entries
        department_codes = [de['value'] for de in department_entries]

        for de in department_entries:
            self.departments[de['value']] = de['title']

        return department_codes

    def parse_courses_in_department(self, html):

        # Check number of results isn't over max
        num_hits_search = re.search("totalRecords: ([0-9]*),", str(html))

        num_hits = 0
        if num_hits_search is not None:
            num_hits = int(num_hits_search.group(1))

        # perform more targeted searches if needed
        if num_hits == 300:
            raise CourseParseError('vandy num_hits greater than 300')
        else:
            self.parse_set_of_courses(html)

    def parse_set_of_courses(self, html):

        prev_course_number = 0
        page_count = 1

        while True:
            # Parse page by page
            last_class_number = self.parse_page_of_courses(html)

            # Condition met when reached last page
            if last_class_number == prev_course_number:
                break

            page_count = page_count + 1
            next_page_url = '{}{}{}'.format(
                Parser.API_URL,
                '/SearchClassesExecute!switchPage.action?pageNum=',
                page_count)
            html = self.requester.get(next_page_url)
            prev_course_number = last_class_number

    def parse_page_of_courses(self, html):

        # initial parse with Beautiful Soup
        courses = html.find_all('tr', {'class': 'classRow'})

        last_class_number = 0
        for course in courses:

            # remove cancelled classes
            if course.find('a', {'class': 'cancelledStatus'}):
                self.course['cancelled'] = True

            last_class_number = self.parse_course(course)

        return last_class_number

    def parse_course(self, soup):

        # Extract course code and term number to generate access to more info
        details = soup.find('td', {'class', 'classSection'})['onclick']

        # Extract course number and term code
        search = re.search("showClassDetailPanel.fire\({classNumber : '([0-9]*)', termCode : '([0-9]*)',", details)

        course_number, term_code = search.group(1), search.group(2)

        # Base URL to retrieve detailed course info
        course_details_url = Parser.API_URL \
            + '/GetClassSectionDetail.action'

        # Create payload to request course from server
        payload = {
            'classNumber': course_number,
            'termCode': term_code
        }

        try:
            self.parse_course_details(self.requester.get(course_details_url,
                                                         payload))

            # Create models
            created_section = self.create_section(self.create_course())
            if created_section:
                self.create_offerings(created_section)

            # Clear course map for next pass
            self.course.clear()

        except ParseError:
            print('invalid course, parse exception')

        return course_number

    def parse_course_details(self, html):
        # Extract course name and abbreviation details
        search = re.search(
            "(.*):.*\n(.*)",
            html.find(id='classSectionDetailDialog').find('h1').text)
        courseName, abbr = search.group(2), search.group(1)

        # Extract department code, catalog ID, and section number from abbr
        title = re.match("(\S*)-(\S*)-(\S*)", abbr)

        if not title:
            raise ParseError()

        department_code = title.group(1)
        catalog_id = title.group(2)
        section_number = title.group(3)

        if self.verbosity > 2:
            print('\t-', department_code, catalog_id,
                  section_number.strip(), '-')

        self.update_current_course("name", courseName)
        self.update_current_course("code", department_code + '-' + catalog_id)
        self.update_current_course("department", department_code)
        self.update_current_course("Catalog ID", catalog_id)
        self.update_current_course('section',
                                   '(' + section_number.strip() + ')')

        # in case no description for course
        self.update_current_course('description', '')

        # Deal with course details as subgroups seen on details page
        detail_headers = html.find_all('div', {'class': 'detailHeader'})
        detail_panels = html.find_all('div', {'class': 'detailPanel'})

        # NOTE: there should be equal detail headers and detail panels
        assert(len(detail_headers) == len(detail_panels))

        for i in range(len(detail_headers)):

            # Extract header name
            header = detail_headers[i].text.strip()

            # Choose parsing strategy dependent on header
            if header == "Details" or header == "Availability":
                self.parse_labeled_table(detail_panels[i])

            elif header == "Description":
                self.parse_description(detail_panels[i])

            elif header == "Notes":
                self.parse_notes(detail_panels[i])

            elif header == "Meeting Times":
                self.parse_meeting_times(detail_panels[i])

            elif header == "Cross Listings":
                pass

            elif header == "Attributes":
                self.parse_attributes(detail_panels[i])

            elif header == "Ad Hoc Meeting Times":
                pass

    def parse_attributes(self, soup):

        labels = [l.text.strip() for l in soup.find_all('div', {'class': 'listItem'})]
        self.update_current_course("Attributes", ', '.join(labels))

    def parse_labeled_table(self, soup):

        # Gather all labeled table entries
        labels = soup.find_all('td', {'class' : 'label'})

        for label in labels:

            siblings = label.find_next_siblings()

            # Check if label value exists
            if len(siblings) != 0:

                # Extract pure label from html
                key = label.text[:-1].strip()

                # Extract label's value(s) [deals with multiline multi-values]
                values = [l for l in (line.strip() for line in siblings[0].text.splitlines()) if l]

                # Edge cases
                if key == "Books":
                    # bookURL = re.search("new YAHOO.mis.student.PopUpOpener\('(.*)',", values[0])
                    # values = [bookURL.group(1)]
                    values = ["<long bn url>"]

                elif key == "Hours":
                    values[0] = str(safe_cast(values[0], float, default=0.))

                self.update_current_course(key, ', '.join(values))

    def parse_meeting_times(self, soup):

        # Gather all labeled table entries
        labels = soup.find_all('th', {'class': 'label'})

        values = []
        if len(labels) > 0:
            values = soup.find('tr', {'class': 'courseHeader'}).find_next_siblings()[0].find_all('td')
        else:

            # Create empty times slots
            self.update_current_course('days', '')
            self.update_current_course('time_start', '')
            self.update_current_course('time_end', '')

        # NOTE: number of labels and values should be the same
        assert(len(labels) == len(values))

        for i in range(len(labels)):
            label = labels[i].text.strip()
            value = values[i].text.strip()
            if len(label) > 0 and len(value) > 0:

                if label == "Instructor(s)":
                    self.update_current_course(label, ', '.join(self.extract_instructors(value)))

                elif label == "Time":
                    self.parse_time_range(value)

                elif label == "Days":
                    self.parse_days(value)

                else:
                    self.update_current_course(label, value)

    def parse_days(self, unformatted_days):
        if unformatted_days == "TBA" or unformatted_days == "":
            self.update_current_course("days", "")
        else:
            self.update_current_course("days", unformatted_days)

    def parse_time_range(self, unformatted_time_range):

        if unformatted_time_range == "TBA" or unformatted_time_range == "":

            # Create empty time slots
            self.update_current_course('days', '')
            self.update_current_course('time_start', '')
            self.update_current_course('time_end', '')

        else:

            search = re.match("(.*) \- (.*)", unformatted_time_range)
            if search is not None:
                self.update_current_course('time_start', self.extractor.time_12to24(search.group(1)))
                self.update_current_course('time_end', self.extractor.time_12to24(search.group(2)))
            else:
                print('ERROR: invalid time format', file=sys.stderr)

    def extract_instructors(self, string):

        instructors = string.splitlines()

        for i in range(len(instructors)):

            # Deal with instance of primary instructor
            search = re.match("(.*) \(Primary\)", instructors[i])
            if search is not None:
                instructors[i] = search.group(1)

        return instructors

    def parse_notes(self, soup):
        notes = ' '.join([l for l in (p.strip() for p in soup.text.splitlines()) if l]).strip()
        self.update_current_course('description', self.course.get('description') + '\nNotes: ' + notes)

    def parse_description(self, soup):
        self.update_current_course('description', soup.text.strip())