def get_and_parse_inmate_rows(): """ Uses get_html() -- via data_helper module -- to get HTML for parsing. Then uses BeautifulSoup to parse it. Then uses the .select() method to pick the Tag elements that contain inmate info Args: none Returns: <list>: a list of <bs4.Element.Tag> objects, each one looking like: <tr> <td>999608</td> <td align="center"><a href="dr_info/hudsonwilliam.html" title="Offender Information for William Hudson">Offender Information</a></td> <td>Hudson</td> <td>William</td> <td>07/03/1982</td> <td align="center">M</td> <td>White</td> <td>11/16/2017</td> <td>Anderson</td> <td>11/14/2015</td> </tr> """ ## fill this out for yourself... txt = get_html() soup = BeautifulSoup(txt, 'lxml') return soup.select('table.tdcj_table tr')[1:]
def get_and_parse_inmate_rows(): txt = get_html( ) #str: the contents of the file at DATA_FILEPATH as a big text string soup = BeautifulSoup(txt, 'lxml') records = soup.select('tr')[1:] return records """
def test_data_helper_get_html(): """ Makes sure that this returns a str object and that it seems to contain the kind of data we expect for this page """ txt = data_helper.get_html() assert isinstance(txt, str) # sanity checks to make sure the file is what we expect # in terms of content assert '<h1>Death Row Information</h1>' in txt assert 'Information for Eric Williams' in txt
def get_and_parse_inmate_rows(): """ A convenience function that calls the functions needed to fetch the webpage, parse it as HTML, and return a list of bs4 <Tag> objects, each one derived from the HTML for a table row that ostensibly contains info about a TX death row inmate Should call on functionality in the `data_helper.py` script (i.e. get_html()) Args: None Returns: <list>: A list of bs4.Tag objects """ html = data_helper.get_html() return BeautifulSoup(html, 'lxml').select('table.tdcj_table tr')[1:]
def get_and_parse_inmate_rows(): """ A convenience function that calls the functions needed to fetch the webpage, parse it as HTML, and return a list of bs4 <Tag> objects, each one derived from the HTML for a table row that ostensibly contains info about a TX death row inmate Should call on functionality in the `data_helper.py` script (i.e. get_html()) Args: None Returns: <list>: A list of bs4.Tag objects, of the <tr> type """ ### Fill in yourself, should be exactly the same ### as from the function found in checker.py ### of txdeathrow_check exercise txt = data_helper.get_html() soup = BeautifulSoup(txt, 'lxml') return soup.select('table.tdcj_table tr')[1:]
def get_and_parse_inmate_rows(): """ A convenience function that calls the functions needed to fetch the webpage, parse it as HTML, and return a list of bs4 <Tag> objects, each one derived from the HTML for a table row that ostensibly contains info about a TX death row inmate Should call on functionality in the `data_helper.py` script (i.e. get_html()) Args: None Returns: <list>: A list of bs4.Tag objects, of the <tr> type """ txt = data_helper.get_html() soup = BeautifulSoup(txt, 'lxml') tag = soup.select('tr') rows = [] for x in tag: rows.append(x) return rows[1:]
from data_helper import get_html from bs4 import BeautifulSoup txt = get_html() soup = BeautifulSoup(txt, 'lxml') rows = soup.select('tr') def get_and_parse_inmate_rows(): """ Uses get_html() -- via data_helper module -- to get HTML for parsing. Then uses BeautifulSoup to parse it. Then uses the .select() method to pick the Tag elements that contain inmate info Args: none Returns: <list>: a list of <bs4.Element.Tag> objects, each one looking like: <tr> <td>999608</td> <td align="center"><a href="dr_info/hudsonwilliam.html" title="Offender Information for William Hudson">Offender Information</a></td> <td>Hudson</td> <td>William</td> <td>07/03/1982</td> <td align="center">M</td> <td>White</td> <td>11/16/2017</td> <td>Anderson</td> <td>11/14/2015</td> </tr> """ ## fill this out for yourself...
def get_and_parse_inmate_rows(): soup = BeautifulSoup(get_html(), 'lxml') rows = soup.select('tr') return rows[1:]