def handle_all(): for (_, pat) in pp.read_events(): if not handle_patent(pat): return False clear(pat) return True
def parse_gen3(fname_in): global i, o, p for (event, elem) in iterparse(fname_in, tag='patent-assignment', events=['end'], recover=True): # top-level section record = elem.find('assignment-record') assignor = elem.find('patent-assignors')[0] assignee = elem.find('patent-assignees')[0] patents = elem.find('patent-properties') # conveyance convey = get_text(record, 'conveyance-text') # names assignor_name = get_text(assignor, 'name') assignee_name = get_text(assignee, 'name') # dates exec_sec = assignor.find('execution-date') recd_sec = record.find('recorded-date') exec_date = get_text(exec_sec, 'date') if exec_sec is not None else '' recd_date = get_text(recd_sec, 'date') if recd_sec is not None else '' # location assignee_country = get_text(assignee, 'country-name', default='UNITED STATES') assignee_state = get_text(assignee, 'state') # patent info patnums = list(gen_patnums(patents)) npat = len(patnums) if npat == 0: continue # code names src_type = org_type(assignor_name) dst_type = org_type(assignee_name) ctype = convey_type(convey) # throw out individuals if src_type == ORG_INDV or dst_type == ORG_INDV or ctype == CONV_OTHER: o += 1 continue # output for pn in patnums: chunker.insert(None, pn, exec_date, recd_date, convey, assignor_name, assignee_name, assignee_state, assignee_country) # free memory clear(elem) # stats i += 1 p += npat # logging if i % 1000 == 0: print('%4d: %40.40s -> %30.30s (%20.20s, %20.20s)' % (npat, assignor_name, assignee_name, assignee_state, assignee_country)) # break if args.limit and i >= args.limit: return False return True