Esempi in Python per HTMLTableSet

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: messytables

Classe/tipologia: HTMLTableSet

Esempi su hotexamples.com: 7

HTMLTableSet in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per messytables.HTMLTableSet, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

HTMLTableSet(7)

Metodi utilizzati di frequente

HTMLTableSet (7)

Esempio n. 1

Mostra file

 def test_invisible_text_html(self):
     fh = horror_fobj('invisible_text.html')
     table_set = HTMLTableSet(fh)
     row_set = table_set.tables[0]
     assert_equal(4, len(list(row_set)))
     row = list(row_set.sample)[1]
     assert_equal(row[5].value.strip(), '1 July 1879')

Esempio n. 2

Mostra file

 def test_read_real_html(self):
     fh = horror_fobj('html.html')
     table_set = HTMLTableSet(fh)
     row_set = table_set.tables[0]
     assert_equal(200, len(list(row_set)))
     row = list(row_set.sample)[0]
     assert_equal(row[0].value.strip(), 'HDI Rank')
     assert_equal(row[1].value.strip(), 'Country')
     assert_equal(row[4].value.strip(), '2010')

Esempio n. 3

Mostra file

def any_tableset(fileobj, mimetype=None, extension=None):
    """Reads any supported table type according to a specified
    MIME type or file extension or automatically detecting the
    type.

    Best matching TableSet loaded with the fileobject is returned.
    Matching is done by looking at the type (e.g mimetype='text/csv')
    or file extension (e.g. extension='tsv'), or otherwise autodetecting
    the file format by using the magic library which looks at the first few
    bytes of the file BUT is often wrong. Consult the source for recognized
    MIME types and file extensions.

    On error it raises messytables.ReadError
    """
    # Auto-detect if the caller has offered no clue. (Because the
    # auto-detection routine is pretty poor.)
    if mimetype is None and extension is None:
        import magic
        # Since we need to peek the start of the stream, make sure we can
        # seek back later. If not, slurp in the contents into a StringIO.
        fileobj = messytables.seekable_stream(fileobj)
        header = fileobj.read(1024)
        mimetype = magic.from_buffer(header, mime=True)
        fileobj.seek(0)

    if (mimetype in ('application/x-zip-compressed', 'application/zip')
            or (extension and extension.lower() in ('zip',))):
        # Do this first because the extension applies to the content
        # type of the inner files, so don't check them before we check
        # for a ZIP file.
        return ZIPTableSet(fileobj)

    if (mimetype in ('text/csv', 'text/comma-separated-values') or
            (extension and extension.lower() in ('csv',))):
        return CSVTableSet(fileobj)  # guess delimiter
    if (mimetype in ('text/tsv', 'text/tab-separated-values') or
            (extension and extension.lower() in ('tsv',))):
        return CSVTableSet(fileobj, delimiter='\t')
    if mimetype in ('application/ms-excel', 'application/vnd.ms-excel',
                    'application/xls') or (extension and extension.lower() in
                                           ('xls',)):
        return XLSTableSet(fileobj)
    if (mimetype in (
        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',)
            or (extension and extension.lower() in ('xlsx',))):
        return XLSXTableSet(fileobj)
    if (mimetype in ('text/html',)
            or (extension and extension.lower() in ('htm', 'html',))):
        return HTMLTableSet(fileobj)
    if mimetype:
        raise ValueError("Unrecognized MIME type: {mimetype}".format(
            mimetype=mimetype))
    if extension:
        raise ValueError('''Could not determine MIME type and
         unrecognized extension: {extension}'''.format(extension=extension))
    raise ValueError("Could not determine MIME type and no extension given.")

Esempio n. 4

Mostra file

    def test_that_inner_table_contains_data(self):
        fh = horror_fobj('complex.html')
        tables = {}
        for table in HTMLTableSet(fh).tables:
            tables[table.name] = table

        inner_table = tables['Table 1 of 2']
        cell_values = []
        for row in inner_table:
            for cell in row:
                cell_values.append(cell.value)
        assert_equal(['head', 'body', 'foot'], cell_values)

Esempio n. 5

Mostra file

    def test_that_outer_table_contains_nothing(self):
        fh = horror_fobj('complex.html')
        tables = {}
        for table in HTMLTableSet(fh).tables:
            tables[table.name] = table

        # outer_table should contain no meaningful data
        outer_table = list(tables['Table 2 of 2'])
        assert_equal(len(outer_table), 1)
        assert_equal(len(outer_table[0]), 1)
        assert_equal(
            outer_table[0][0].value.replace(" ", "").replace("\n", ""),
            "headfootbody")

Esempio n. 6

Mostra file

File: test_read.py Progetto: scraperdragon/messytables

    def test_read_span_html(self):
        fh = horror_fobj('rowcolspan.html')
        table_set = HTMLTableSet(fh)
        row_set = table_set.tables[0]

        magic = {}
        for y, row in enumerate(row_set):
            for x, cell in enumerate(row):
                magic[(x, y)] = cell.value

        tests = {(0, 0): '05',
                 (0, 2): '25',
                 (0, 3): '',
                 (1, 3): '36',
                 (1, 6): '66',
                 (4, 7): '79',
                 (4, 8): '89'}

        for test in tests:
            assert_equal(magic[test], tests[test])

Esempio n. 7

Mostra file

 def test_html_table_name(self):
     fh = horror_fobj('html.html')
     table_set = HTMLTableSet(fh)
     assert_equal('Table 1 of 3', table_set.tables[0].name)
     assert_equal('Table 2 of 3', table_set.tables[1].name)
     assert_equal('Table 3 of 3', table_set.tables[2].name)