Esempio n. 1
0
def populate_csv_file():

    backup(csv_path)

    f_csv = CsvFile(csv_path, 'w')

    seen_courses = set()

    course_tags_soup = BeautifulSoup(open(html_path), 'html.parser')

    for course_tag in course_tags_soup.findAll('a'):

        displayed_course_information = course_tag.contents[0]

        department_and_level_regex = '[A-Zx0-9 \.\-]+'

        if re.match('%s \- ' % department_and_level_regex, displayed_course_information):
            department_and_level, title = displayed_course_information.split(' - ', 1)
        elif re.search(' \- %s($|\s)' % department_and_level_regex, displayed_course_information):
            title, department_and_level = displayed_course_information.rsplit(' - ', 1)
        else:
            title, department_and_level = displayed_course_information, ''

        url = course_tag.get('href')

        if (title, department_and_level) not in seen_courses:
            f_csv.add_row(title, department_and_level, url)

        seen_courses.add((title, department_and_level))

    f_csv.close()