Python HTMLTableSetの例

プログラミング言語: Python

名前空間/パッケージ名: messytables

クラス/型: HTMLTableSet

hotexamples.comのコード掲載数: 7

Python HTMLTableSet - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmessytables.HTMLTableSetの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

HTMLTableSet(7)

よく使われるメソッド

HTMLTableSet (7)

コード例 #1

ファイルを表示

 def test_invisible_text_html(self):
     fh = horror_fobj('invisible_text.html')
     table_set = HTMLTableSet(fh)
     row_set = table_set.tables[0]
     assert_equal(4, len(list(row_set)))
     row = list(row_set.sample)[1]
     assert_equal(row[5].value.strip(), '1 July 1879')

コード例 #2

ファイルを表示

 def test_read_real_html(self):
     fh = horror_fobj('html.html')
     table_set = HTMLTableSet(fh)
     row_set = table_set.tables[0]
     assert_equal(200, len(list(row_set)))
     row = list(row_set.sample)[0]
     assert_equal(row[0].value.strip(), 'HDI Rank')
     assert_equal(row[1].value.strip(), 'Country')
     assert_equal(row[4].value.strip(), '2010')

コード例 #3

ファイルを表示

def any_tableset(fileobj, mimetype=None, extension=None):
    """Reads any supported table type according to a specified
    MIME type or file extension or automatically detecting the
    type.

    Best matching TableSet loaded with the fileobject is returned.
    Matching is done by looking at the type (e.g mimetype='text/csv')
    or file extension (e.g. extension='tsv'), or otherwise autodetecting
    the file format by using the magic library which looks at the first few
    bytes of the file BUT is often wrong. Consult the source for recognized
    MIME types and file extensions.

    On error it raises messytables.ReadError
    """
    # Auto-detect if the caller has offered no clue. (Because the
    # auto-detection routine is pretty poor.)
    if mimetype is None and extension is None:
        import magic
        # Since we need to peek the start of the stream, make sure we can
        # seek back later. If not, slurp in the contents into a StringIO.
        fileobj = messytables.seekable_stream(fileobj)
        header = fileobj.read(1024)
        mimetype = magic.from_buffer(header, mime=True)
        fileobj.seek(0)

    if (mimetype in ('application/x-zip-compressed', 'application/zip')
            or (extension and extension.lower() in ('zip',))):
        # Do this first because the extension applies to the content
        # type of the inner files, so don't check them before we check
        # for a ZIP file.
        return ZIPTableSet(fileobj)

    if (mimetype in ('text/csv', 'text/comma-separated-values') or
            (extension and extension.lower() in ('csv',))):
        return CSVTableSet(fileobj)  # guess delimiter
    if (mimetype in ('text/tsv', 'text/tab-separated-values') or
            (extension and extension.lower() in ('tsv',))):
        return CSVTableSet(fileobj, delimiter='\t')
    if mimetype in ('application/ms-excel', 'application/vnd.ms-excel',
                    'application/xls') or (extension and extension.lower() in
                                           ('xls',)):
        return XLSTableSet(fileobj)
    if (mimetype in (
        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',)
            or (extension and extension.lower() in ('xlsx',))):
        return XLSXTableSet(fileobj)
    if (mimetype in ('text/html',)
            or (extension and extension.lower() in ('htm', 'html',))):
        return HTMLTableSet(fileobj)
    if mimetype:
        raise ValueError("Unrecognized MIME type: {mimetype}".format(
            mimetype=mimetype))
    if extension:
        raise ValueError('''Could not determine MIME type and
         unrecognized extension: {extension}'''.format(extension=extension))
    raise ValueError("Could not determine MIME type and no extension given.")

コード例 #4

ファイルを表示

    def test_that_inner_table_contains_data(self):
        fh = horror_fobj('complex.html')
        tables = {}
        for table in HTMLTableSet(fh).tables:
            tables[table.name] = table

        inner_table = tables['Table 1 of 2']
        cell_values = []
        for row in inner_table:
            for cell in row:
                cell_values.append(cell.value)
        assert_equal(['head', 'body', 'foot'], cell_values)

コード例 #5

ファイルを表示

    def test_that_outer_table_contains_nothing(self):
        fh = horror_fobj('complex.html')
        tables = {}
        for table in HTMLTableSet(fh).tables:
            tables[table.name] = table

        # outer_table should contain no meaningful data
        outer_table = list(tables['Table 2 of 2'])
        assert_equal(len(outer_table), 1)
        assert_equal(len(outer_table[0]), 1)
        assert_equal(
            outer_table[0][0].value.replace(" ", "").replace("\n", ""),
            "headfootbody")

コード例 #6

ファイルを表示

ファイル: test_read.py プロジェクト: scraperdragon/messytables

    def test_read_span_html(self):
        fh = horror_fobj('rowcolspan.html')
        table_set = HTMLTableSet(fh)
        row_set = table_set.tables[0]

        magic = {}
        for y, row in enumerate(row_set):
            for x, cell in enumerate(row):
                magic[(x, y)] = cell.value

        tests = {(0, 0): '05',
                 (0, 2): '25',
                 (0, 3): '',
                 (1, 3): '36',
                 (1, 6): '66',
                 (4, 7): '79',
                 (4, 8): '89'}

        for test in tests:
            assert_equal(magic[test], tests[test])

コード例 #7

ファイルを表示

 def test_html_table_name(self):
     fh = horror_fobj('html.html')
     table_set = HTMLTableSet(fh)
     assert_equal('Table 1 of 3', table_set.tables[0].name)
     assert_equal('Table 2 of 3', table_set.tables[1].name)
     assert_equal('Table 3 of 3', table_set.tables[2].name)