コード例 #1
0
ファイル: scan_cited.py プロジェクト: paper82/patentcite
def process(patnum, doc, ref):
    prepare_pdf_pages(patnum, doc)
    locations = parser.parse_locations(ref)
    basedir = config.data_dir + patnum + "/" + doc + "/"
    basefile = config.data_dir + patnum + "/" + doc + "/" + doc
    for type in locations:
        if type in dir(processor):
            for reference in locations[type]:
                text = getattr(processor, type)(basefile, reference)
                if text:
                    _store_reference_file(basedir, reference, text)
                else:
                    print 'Could not evaluate ' + ref + ' in ' + basefile
                    print 'Processor: ' + str(processor) + ', ' + type
        else:
            'Processor ' + type + ' not implemented.'
コード例 #2
0
def _scan_lines_for_references(indices, found, lines):
    """ Scan lines for references.
    """
    for pos, index in enumerate(indices):
        try:
            next_index = indices[pos + 1]
            sublines = lines[index:next_index]
        except:
            sublines = lines[index:]
        subtext = ""
        for subline in sublines:
            subtext += subline.lower()
        for word in cfgdict.words:
            subtext = subtext.replace(word, cfgdict.words[word])
        refs = parser.parse_locations(subtext)
        found[index]["references"] = refs
        found[index]["abstract"] = parser.parse_for_word(subtext, cfgdict.abstract)
        found[index]["figures"] = parser.parse_for(subtext, config.figures_parser, 'figures')
        found[index]["claims"] = parser.parse_for(subtext, config.claims_parser, 'claims')
        found[index]["paragraphs"] = parser.parse_for(subtext, config.paragraphs_parser, 'paragraphs')
        found[index]["whole"] = parser.parse_for_word(subtext, cfgdict.whole_document)
        found[index]["rawtext"] = subtext
    return found