def prep_para(para): para['text'] = _para_text_content(para) def next_para(): paras = section['paragraphs'] next_index = para['index'] - paras[0]['index'] + 1 if next_index >= len(paras): return None next_p = prep_para(copy.deepcopy(paras[next_index])) if matchers.empty(next_p): next_p = next_p['next']() return next_p para['next'] = next_para; return para
def prep_para(para): para['text'] = _para_text_content(para) def next_para(): paras = section['paragraphs'] next_index = para['index'] - paras[0]['index'] + 1 if next_index >= len(paras): return None next_p = prep_para(copy.deepcopy(paras[next_index])) if matchers.empty(next_p): next_p = next_p['next']() return next_p para['next'] = next_para return para
def parse_doc_section(section, dom): parser = Parser(dom) unhandled_count = 0 handled_count = 0 for para in section["paragraphs"]: para['text'] = _para_text_content(para) if not para['text']: continue success = parser(para) if not success and para['text']: unhandled_count += 1 print('unhandled para {}:'.format(para['index']), para, '\n', file=sys.stderr) elif success: handled_count += 1 print('handled paras: {}'.format(handled_count), file=sys.stderr) print('unhandled paras: {}'.format(unhandled_count), file=sys.stderr)