예제 #1
0
def _table2divgroups(elem, table_elem, specmap, omit_whitespace=True):
    import copy
    from lxml import html
    assert table_elem.tag == 'table', table_elem.tag
    cells = cell_lookup(table_elem)
    groups = []
    for spec in specmap:
        groupid = spec.idname
        classvalue = 'mwu-elem-table2divgroups-group'
        if spec.classname is not None:
            classvalue += ' ' + spec.classname
        group_elem = htmlelem(attrib={
                'class' : classvalue,
                'id'    : groupid,
                })
        assert spec.rowend >= spec.rowstart
        assert spec.colend >= spec.colstart
        wrap_rows = (spec.colend > spec.colstart) and (spec.rowend > spec.rowstart) # whether to wrap cells from the same TR tag in their own DIV
        for ii in range(spec.rowstart, spec.rowend+1):
            cell_elems = []
            for jj in range(spec.colstart, spec.colend+1):
                td_elem = cells.get((ii, jj), None)
                if td_elem is None:
                    continue # Could be a colspan issue.  Just skip over to next found cell
                if omit_whitespace and elementempty(td_elem):
                    continue # skip over this empty cell
                cell_elem = copy.deepcopy(td_elem)
                for k in cell_elem.attrib:
                    del cell_elem.attrib[k]
                cell_elem.tag = 'div'
                cell_elems.append(cell_elem)
            if wrap_rows:
                append_elem = htmlelem()
                group_elem.append(append_elem)
            else:
                append_elem = group_elem # meaning, we'll just append the contents of cell_elem directly, not wrapping them in a div
            for cell_elem in cell_elems:
                append_elem.append(cell_elem)
        if not elementempty(group_elem):
            if spec.lastfilter:
                spec.lastfilter(group_elem)
            groups.append(group_elem)
    if table_elem is not None:
        groups_elem = htmlelem(attrib={'class' : 'mwu-elem-table2divgroups'})
        for group_elem in groups:
            groups_elem.append(group_elem)
        replace_child(elem, table_elem, groups_elem)
예제 #2
0
def _table2divs(elem, omit_whitespace, marker_base, wrap_rows):
    '''
    helper for some table-to-div filters
    '''
    from lxml.html import HtmlElement
    def rcmarker(**kw):
        return rcmarkerbase(marker_base, **kw)
    container_elem = htmlelem(attrib={'class' : marker_base})
    if 'table' == elem.tag:
        table_elem = elem
    else:
        table_elem = elem.find('.//table')
    if table_elem is not None:
        root_elem = rowsparent(table_elem)
        rows = root_elem.findall('./tr')
        for rownum, row in enumerate(rows):
            if wrap_rows:
                rowcontainer_elem = htmlelem(attrib={'class' : rcmarker(row=rownum)})
            cols = row.findall('./td')
            for colnum, tdelem in enumerate(cols):
                if omit_whitespace and elementempty(tdelem):
                    continue # skip over this empty cell
                cell_elem = htmlelem(text=tdelem.text)
                for colchild in tdelem:
                    cell_elem.append(colchild)
                markers = [
                    rcmarker(row=rownum, col=colnum),
                    rcmarker(col=colnum),
                    ]
                if not wrap_rows:
                    markers.append(rcmarker(row=rownum))
                tdelem_classvalue = tdelem.attrib.get('class', '').strip()
                if len(tdelem_classvalue) > 0:
                    markers.extend('mwu-td-' + c for c in tdelem_classvalue.split())
                cell_elem.attrib['class'] = ' '.join(markers)
                if wrap_rows:
                    rowcontainer_elem.append(cell_elem)
                else:
                    container_elem.append(cell_elem)
            if wrap_rows:
                container_elem.append(rowcontainer_elem)
        replace_child(elem, table_elem, container_elem)