예제 #1
0
def populate_csv_file():

    backup(csv_path)

    f_csv = CsvFile(csv_path, 'w')

    seen_courses = set()

    course_tags_soup = BeautifulSoup(open(html_path), 'html.parser')

    for course_tag in course_tags_soup.findAll('a'):

        displayed_course_information = course_tag.contents[0]

        department_and_level_regex = '[A-Zx0-9 \.\-]+'

        if re.match('%s \- ' % department_and_level_regex, displayed_course_information):
            department_and_level, title = displayed_course_information.split(' - ', 1)
        elif re.search(' \- %s($|\s)' % department_and_level_regex, displayed_course_information):
            title, department_and_level = displayed_course_information.rsplit(' - ', 1)
        else:
            title, department_and_level = displayed_course_information, ''

        url = course_tag.get('href')

        if (title, department_and_level) not in seen_courses:
            f_csv.add_row(title, department_and_level, url)

        seen_courses.add((title, department_and_level))

    f_csv.close()
예제 #2
0
    def save_as_csv(self, url, file_path):
        csv = CsvFile('./data/test.csv')

        html = './data/view-source_https___www.worldometers.info_coronavirus_.html'
        html = './data/table.html'

        f = open(html, "r")
        soup = BeautifulSoup(f, 'html.parser')
        table = soup.find(id="main_table_countries_today")

        # add header
        columns = table.findAll('th')
        output_row = []
        for column in columns:
            output_row.append(csv.clean(column.text))

        csv.add_columns(output_row)
        #csv.delete_column('A')

        # add rows
        output_rows = []
        for table_row in table.findAll('tr'):
            columns = table_row.findAll('td')
            output_row = []
            for column in columns:
                output_row.append(column.text)
            output_rows.append(output_row)
            csv.add_row(output_row)

        csv.save('./data/abc.csv')