Esempio n. 1
0
def get_and_parse_inmate_rows():
    """
    Uses get_html() -- via data_helper module --
      to get HTML for parsing. Then uses BeautifulSoup
      to parse it. Then uses the .select() method to
      pick the Tag elements that contain inmate info

    Args:
        none

    Returns:
        <list>: a list of <bs4.Element.Tag> objects, each one looking like:

            <tr>
            <td>999608</td>
            <td align="center"><a href="dr_info/hudsonwilliam.html" title="Offender Information for William Hudson">Offender Information</a></td>
            <td>Hudson</td>
            <td>William</td>
            <td>07/03/1982</td>
            <td align="center">M</td>
            <td>White</td>
            <td>11/16/2017</td>
            <td>Anderson</td>
            <td>11/14/2015</td>
            </tr>
    """
    ## fill this out for yourself...
    txt = get_html()
    soup = BeautifulSoup(txt, 'lxml')
    return soup.select('table.tdcj_table tr')[1:]
def get_and_parse_inmate_rows():
    txt = get_html(
    )  #str: the contents of the file at DATA_FILEPATH as a big text string
    soup = BeautifulSoup(txt, 'lxml')
    records = soup.select('tr')[1:]
    return records
    """
def test_data_helper_get_html():
    """
    Makes sure that this returns a str object and that
    it seems to contain the kind of data we expect for this page
    """
    txt = data_helper.get_html()
    assert isinstance(txt, str)
    # sanity checks to make sure the file is what we expect
    # in terms of content
    assert '<h1>Death Row Information</h1>' in txt
    assert 'Information for Eric Williams' in txt
Esempio n. 4
0
def get_and_parse_inmate_rows():
    """
    A convenience function that calls the functions needed to
      fetch the webpage, parse it as HTML, and return a list of bs4 <Tag>
      objects, each one derived from the HTML for a table row
       that ostensibly contains info about a TX death row inmate

    Should call on functionality in the `data_helper.py` script (i.e. get_html())

    Args:
        None

    Returns:
        <list>: A list of bs4.Tag objects
    """
    html = data_helper.get_html()
    return BeautifulSoup(html, 'lxml').select('table.tdcj_table tr')[1:]
def get_and_parse_inmate_rows():
    """
    A convenience function that calls the functions needed to
      fetch the webpage, parse it as HTML, and return a list of bs4 <Tag>
      objects, each one derived from the HTML for a table row
       that ostensibly contains info about a TX death row inmate

    Should call on functionality in the `data_helper.py` script (i.e. get_html())

    Args:
        None

    Returns:
        <list>: A list of bs4.Tag objects, of the <tr> type
    """
    ### Fill in yourself, should be exactly the same
    ### as from the function found in checker.py
    ### of txdeathrow_check exercise
    txt = data_helper.get_html()
    soup = BeautifulSoup(txt, 'lxml')
    return soup.select('table.tdcj_table tr')[1:]
Esempio n. 6
0
def get_and_parse_inmate_rows():
    """
    A convenience function that calls the functions needed to
      fetch the webpage, parse it as HTML, and return a list of bs4 <Tag>
      objects, each one derived from the HTML for a table row
       that ostensibly contains info about a TX death row inmate

    Should call on functionality in the `data_helper.py` script (i.e. get_html())

    Args:
        None

    Returns:
        <list>: A list of bs4.Tag objects, of the <tr> type
    """
    txt = data_helper.get_html()
    soup = BeautifulSoup(txt, 'lxml')
    tag = soup.select('tr')
    rows = []
    for x in tag:
        rows.append(x)
    return rows[1:]
Esempio n. 7
0
from data_helper import get_html
from bs4 import BeautifulSoup
txt = get_html()
soup = BeautifulSoup(txt, 'lxml')
rows = soup.select('tr')

def get_and_parse_inmate_rows():
    """
    Uses get_html() -- via data_helper module --
      to get HTML for parsing. Then uses BeautifulSoup
      to parse it. Then uses the .select() method to
      pick the Tag elements that contain inmate info
    Args:
        none
    Returns:
        <list>: a list of <bs4.Element.Tag> objects, each one looking like:
            <tr>
            <td>999608</td>
            <td align="center"><a href="dr_info/hudsonwilliam.html" title="Offender Information for William Hudson">Offender Information</a></td>
            <td>Hudson</td>
            <td>William</td>
            <td>07/03/1982</td>
            <td align="center">M</td>
            <td>White</td>
            <td>11/16/2017</td>
            <td>Anderson</td>
            <td>11/14/2015</td>
            </tr>
    """

    ## fill this out for yourself...
Esempio n. 8
0
def get_and_parse_inmate_rows():
    soup = BeautifulSoup(get_html(), 'lxml')
    rows = soup.select('tr')
    return rows[1:]