コード例 #1
0
    def test_html_table_parser(self):

        from bs4 import BeautifulSoup as bs
        from html_table_parser import parser_functions as parse

        soup = bs(mock_html_table(), "html.parser")
        test_table = soup.find('table')

        twod = parse.make2d(test_table)

        # two_col_data function is case insensitive
        self.assertEqual(parse.twod_col_data(twod, 'first name'), ['Eve', 'John', 'Adam', 'Jill'])
        # last name for first row is Eve because of colspan
        self.assertEqual(parse.twod_col_data(twod, 'lAst naMe'), ['Eve', 'Doe', 'Johnson', 'Smith'])
        # points for last row is 67 because of rowspan
        self.assertEqual(parse.twod_col_data(twod, 'POINTS'), ['94', '80', '67', '67'])
コード例 #2
0
    def test_html_table_parser(self):

        from bs4 import BeautifulSoup as bs
        from html_table_parser import parser_functions as parse

        soup = bs(mock_html_table(), "html.parser")
        test_table = soup.find('table')

        twod = parse.make2d(test_table)

        # two_col_data function is case insensitive
        self.assertEqual(parse.twod_col_data(twod, 'first name'),
                         ['Eve', 'John', 'Adam', 'Jill'])
        # last name for first row is Eve because of colspan
        self.assertEqual(parse.twod_col_data(twod, 'lAst naMe'),
                         ['Eve', 'Doe', 'Johnson', 'Smith'])
        # points for last row is 67 because of rowspan
        self.assertEqual(parse.twod_col_data(twod, 'POINTS'),
                         ['94', '80', '67', '67'])
コード例 #3
0
ファイル: example.py プロジェクト: ojones/html_table_parser
from html_table_parser.tests import test_html_table_parser as test
import pprint
pp = pprint.PrettyPrinter(indent=4, width=120)

__author__ = 'oswaldjones'


if __name__ == '__main__':

    soup = bs(test.mock_html_table(), "html.parser")
    test_table = soup.find('table')

    twod_array = parse.make2d(test_table)

    # print 2D array
    pp.pprint(twod_array)

    # print column data by col heading name (case insensitive)
    pp.pprint(parse.twod_col_data(twod_array, 'first name'))
    pp.pprint(parse.twod_col_data(twod_array, 'lAst naMe'))

    # row data begins on first row after col headings
    # so rowstart is 1
    pp.pprint(parse.make_dict(test_table, 1))