def process(patnum, doc, ref): prepare_pdf_pages(patnum, doc) locations = parser.parse_locations(ref) basedir = config.data_dir + patnum + "/" + doc + "/" basefile = config.data_dir + patnum + "/" + doc + "/" + doc for type in locations: if type in dir(processor): for reference in locations[type]: text = getattr(processor, type)(basefile, reference) if text: _store_reference_file(basedir, reference, text) else: print 'Could not evaluate ' + ref + ' in ' + basefile print 'Processor: ' + str(processor) + ', ' + type else: 'Processor ' + type + ' not implemented.'
def _scan_lines_for_references(indices, found, lines): """ Scan lines for references. """ for pos, index in enumerate(indices): try: next_index = indices[pos + 1] sublines = lines[index:next_index] except: sublines = lines[index:] subtext = "" for subline in sublines: subtext += subline.lower() for word in cfgdict.words: subtext = subtext.replace(word, cfgdict.words[word]) refs = parser.parse_locations(subtext) found[index]["references"] = refs found[index]["abstract"] = parser.parse_for_word(subtext, cfgdict.abstract) found[index]["figures"] = parser.parse_for(subtext, config.figures_parser, 'figures') found[index]["claims"] = parser.parse_for(subtext, config.claims_parser, 'claims') found[index]["paragraphs"] = parser.parse_for(subtext, config.paragraphs_parser, 'paragraphs') found[index]["whole"] = parser.parse_for_word(subtext, cfgdict.whole_document) found[index]["rawtext"] = subtext return found