예제 #1
0
def _split(inputfile, outputdir):
    source = open(inputfile, 'r')
    html = source.read()
    source.close()

    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)

    idx_slide=0
    idx_section=0

    parsed = PyQuery(html)
    
    for section in parsed('section'):
        slide = PyQuery(section)        
        if slide.has_class('stack'):
            idx_section+=1
            stack_path = os.path.join(outputdir,'%02d' % idx_section )
            os.mkdir(stack_path)
            for sub_slide in PyQuery(slide.html())('section'):
                idx_slide+=1
                _dump_slide(sub_slide, idx_slide, stack_path)
        else: 
            if not slide.parent().has_class('stack'):
                idx_slide+=1
                _dump_slide(slide, idx_slide, outputdir)                    
예제 #2
0
파일: parsers.py 프로젝트: fjork3/dbtruck
    def find_ideal_tables(self, tables):
        try:
            from pyquery import PyQuery
        except:
            print >>sys.stderr, "could not import pyquery"
            return []

        rm = []
        for table in tables:
            found = False
            for t2 in tables:
                if table == t2:
                    continue
                t2 = PyQuery(t2)
                _t = PyQuery(table)
                while len(_t):
                    if _t == t2:
                        found = True
                        break
                    _t = _t.parent()
            if found:
                rm.append(table)
        ret = [table for table in tables if table not in rm]
        return ret