Example #1
0
def parse():
    """
    Parses data directory for citations.
    """
    data = parser.search_report_data()
    for report in data:
        for entry in report:
            base_patent = entry['patnum_root']
            parse_csv(config.data_dir + '/' + base_patent + '/' + base_patent + '.export.csv')
Example #2
0
def lines(patnum, pdf):
    filename = pdf.replace(".pdf", ".txt")
    data = parser.search_report_data(config.data_dir + patnum, filename)
    lines = []
    for reportfile in data:
        for entry in reportfile:
            line = entry['country'] + '.' + entry['patnum'] + '.' + entry['kindcode']

            if _has_xml(patnum, entry['country'] + '.' + entry['patnum']):
                line += ' <span class="xml-available">(XML available)</span>'
            lines.append(line)
            for ref in entry['refs']:
                lines.append(ref)
    return dict(references=lines, patnum=patnum)
Example #3
0
def parse(epo = False, bz2 = False, clef=False):
    data = parser.search_report_data()
    for report in data:
        for entry in report:
            cited_patent = entry['country'] + '.' + entry['patnum']
            base_patent = entry['patnum_root']
            if bz2:
                download_from_bz2_marec(base_patent, cited_patent)
            elif epo:
                download_from_epo(base_patent, cited_patent)
            elif clef:
                download_from_clef(base_patent, cited_patent)
            else:
                download_from_marec(base_patent, cited_patent)
Example #4
0
def scan(restrict, restrict_cited):
    """
    Scan data dir for search reports and parses them for
    citations.
    """
    data = parser.search_report_data()

    for report in data:
        for entry in report:
            refs = entry['refs']
            doc = entry['country'] + "." + entry['patnum']
            patnum = entry['patnum_root']
            if restrict and patnum != restrict:
                continue
            for ref in refs:
                if ref:
                    if restrict_cited and doc != restrict_cited:
                        continue
                    print "processing ", patnum, doc, ref
                    process(patnum, doc, ref)
Example #5
0
            download.get_pdf_page(country, patnum, kindcode, page + offset, target_folder, "column-" + str(column))
    return kindcode, startpage


def _get_whitelist():
    whitelist = []
    try:
        for line in open('whitelist.txt', 'r').readlines():
            line = line.replace('\n', '')
            whitelist.append(line)
    except:
        whitelist = False
    return whitelist


data = parser.search_report_data()

whitelist = _get_whitelist()


open('perf.cited.txt', 'w').write('')
dl = []

for reportfile in data:
    for entry in reportfile:
        patnum = entry['patnum']
        country = entry['country']
        to_download = entry['country'] + '.' + patnum

        if country == 'WO':
           to_download = detected.normalize_wo_patnum(patnum)
Example #6
0
from base import parser


testdata = parser.search_report_data('testdata/')
data = parser.search_report_data()

def find(key, value, data):
    found = False
    for set in data:
        if set[key] == value:
            found = True
    return found


for set in testdata:
    for property in set:
        patnum = property['patnum']
        country = property['country']
        kindcode = property['kindcode']
        print find('patnum', patnum, set)
        print find('country', country, set)
        print find('kindcode', kindcode, set)