-
Notifications
You must be signed in to change notification settings - Fork 0
/
soup_operations_functions.py
30 lines (25 loc) · 1.03 KB
/
soup_operations_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#Functions that give operational support to BeautifulSoup stuff
from bs4 import BeautifulSoup
import urllib.request
import soup_info_functions
#Takes URL and returns a BeautifulSoup object
def get_html_soup(url):
with urllib.request.urlopen(url) as response:
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
return soup
#improve to include director/composer, a/an, male/female
def print_roles(soup):
name = soup_info_functions.get_person(soup)
print('Actor: ' + str(soup_info_functions.is_an_actor(soup)))
print('Director: ' + str(soup_info_functions.is_a_director(soup)))
print('Composer: ' + str(soup_info_functions.is_a_composer(soup)))
def write_html(soup, output_file_name):
pretty_html = soup.prettify()
with open(output_file_name, 'w+') as file:
file.write(pretty_html)
def write_a_tags(soup, output_file_name):
with open(output_file_name, 'w+') as file:
links = soup.find_all('a')
for link in links:
file.write(str(link) + '\n')