Beispiel #1
0

# collect files
if len(args.target) == 0 or (len(args.target) == 1
                             and os.path.isdir(args.target[0])):
    targ_dir = 'assign_files' if len(args.target) == 0 else args.target[0]
    file_list = sorted(glob.glob('%s/*.xml' % targ_dir))
else:
    file_list = args.target

# do robust parsing
for fpath in file_list:
    (fdir, fname) = os.path.split(fpath)
    print('Parsing %s' % fname)
    i0 = i
    o0 = o
    p0 = p
    try:
        parse_gen3(fpath)
    except Exception as e:
        print('EXCEPTION OCCURRED!')
        print_exc()
    print('Found %d records, %d dropped, %d patents' %
          (i - i0, o - o0, p - p0))
    print('Total %d records, %d dropped, %d patents' % (i, o, p))
    print()

# clear out the rest
chunker.commit()
con.close()
Beispiel #2
0
# parse by generation
for fpath in file_list:
    (fdir, fname) = os.path.split(fpath)
    if fname.endswith('.dat'):
        gen = 1
        parser = parse_grants_gen1
    elif fname.startswith('pgb'):
        gen = 2
        parser = parse_grants_gen2
    elif fname.startswith('ipgb'):
        gen = 3
        parser = parse_grants_gen3
    else:
        raise(Exception('Unknown format'))

    print('Parsing %s, gen = %d' % (fname, gen))
    i0 = i
    try:
        parser(fpath, store_patent)
    except Exception as e:
        print('EXCEPTION OCCURRED!')
        print_exc()
    print('Found %d patents, %d total' % (i-i0, i))
    print()

# commit to db and close
pat_chunker.commit()
ipc_chunker.commit()
cit_chunker.commit()
con.close()
Beispiel #3
0
        print("Reached limit.")
        break

    (fdir, fname) = os.path.split(fpath)
    if fname.endswith('.dat'):
        gen = 1
        parser = parse_grants_gen1
    elif fname.startswith('pgb'):
        gen = 2
        parser = parse_grants_gen2
    elif fname.startswith('ipgb'):
        gen = 3
        parser = parse_grants_gen3
    else:
        raise (Exception('Unknown format'))

    print('Parsing %s, gen = %d' % (fname, gen))
    i0 = i
    try:
        parser(fpath, store_patent)
    except Exception as e:
        print('EXCEPTION OCCURRED!')
        print_exc()
    print('Found %d patents, %d total' % (i - i0, i))
    print()

# commit to db and close
pat_chunker.commit()
ipc_chunker.commit()
cit_chunker.commit()
con.close()
Beispiel #4
0
    return True

# collect files
if len(args.target) == 0 or (len(args.target) == 1 and os.path.isdir(args.target[0])):
    targ_dir = 'assign_files' if len(args.target) == 0 else args.target[0]
    file_list = sorted(glob.glob('%s/*.xml' % targ_dir))
else:
    file_list = args.target

# do robust parsing
for fpath in file_list:
    (fdir, fname) = os.path.split(fpath)
    print('Parsing %s' % fname)
    i0 = i
    o0 = o
    p0 = p
    try:
        parse_gen3(fpath)
    except Exception as e:
        print('EXCEPTION OCCURRED!')
        print_exc()
    print('Found %d records, %d dropped, %d patents' % (i-i0, o-o0, p-p0))
    print('Total %d records, %d dropped, %d patents' % (i, o, p))
    print()

# clear out the rest
chunker.commit()
con.close()