def handle_patent(elem): pat = defaultdict(str) pat['gen'] = 2 # top-level section bib = elem.find('SDOBI') # published patent pubref = bib.find('B100') pat['patnum'] = get_text(pubref, 'B110/DNUM/PDAT') pat['grantdate'] = get_text(pubref, 'B140/DATE/PDAT') # filing date appref = bib.find('B200') pat['filedate'] = get_text(appref, 'B220/DATE/PDAT') # ipc code patref = bib.find('B500') ipcsec = patref.find('B510') ipcver = get_text(ipcsec, 'B516/PDAT') ipclist = [] ipc1 = get_text(ipcsec, 'B511/PDAT') if ipc1 is not None: ipclist.append((ipc1, ipcver)) for child in ipcsec.findall('B512'): ipc = get_text(child, 'PDAT') ipclist.append((ipc, ipcver)) pat['ipclist'] = ipclist # citations cites = [] refs = patref.find('B560') if refs is not None: for cite in refs.findall('B561'): pcit = get_text(cite, 'PCIT/DOC/DNUM/PDAT') cites.append(pcit) pat['citlist'] = cites # title pat['title'] = get_text(patref, 'B540/STEXT/PDAT') # claims pat['claims'] = get_text(patref, 'B570/B577/PDAT') # applicant name and address ownref = bib.find('B700/B730/B731/PARTY-US') if ownref is not None: pat['owner'] = get_text(ownref, 'NAM/ONM/STEXT/PDAT').upper() address = ownref.find('ADR') if address is not None: pat['state'] = get_text(address, 'STATE/PDAT') pat['country'] = get_text(address, 'CTRY/PDAT', default='US') # abstract abspars = elem.findall('SDOAB/BTEXT/PARA') if len(abspars) > 0: pat['abstract'] = '\n'.join([raw_text(e) for e in abspars]) # roll it in return store_patent(pat)
def parse_grants_gen3(elem): pat = copy(default) # top-level section bib = elem.find('us-bibliographic-data-application') pubref = bib.find('publication-reference') appref = bib.find('application-reference') # published patent pubinfo = pubref.find('document-id') pat['pubnum'] = get_text(pubinfo, 'doc-number') pat['pubdate'] = get_text(pubinfo, 'date') # filing date pat['appnum'] = get_text(appref, 'document-id/doc-number') pat['appdate'] = get_text(appref, 'document-id/date') pat['appname'] = get_text(bib, 'assignees/assignee/orgname') # title pat['title'] = get_text(bib, 'invention-title') # ipc code ipcsec = bib.find('classifications-ipcr') if ipcsec is not None: ipclist = list(gen3_ipcr(ipcsec)) pat['ipc1'], pat['ipcver'] = ipclist[0] pat['ipc2'] = ';'.join([i for i, _ in ipclist]) ipcsec = bib.find('classification-ipc') if ipcsec is not None: ipclist = list(gen3_ipc(ipcsec)) pat['ipc1'], pat['ipcver'] = ipclist[0] pat['ipc2'] = ';'.join([i for i, _ in ipclist]) # applicant name and address address = bib.find('parties/applicants/applicant/addressbook/address') if address is not None: pat['city'] = get_text(address, 'city') pat['state'] = get_text(address, 'state') pat['country'] = get_text(address, 'country') # abstract abspar = elem.find('abstract') if abspar is not None: pat['abstract'] = raw_text(abspar, sep=' ') # roll it in return store_patent(pat)
def parse_grants_gen2(elem): pat = copy(default) # top-level section bib = elem.find('subdoc-bibliographic-information') # publication data pub = bib.find('document-id') if pub is not None: pat['pubnum'] = get_text(pub, 'doc-number') pat['pubdate'] = get_text(pub, 'document-date') # application data app = bib.find('domestic-filing-data') if app is not None: pat['appnum'] = get_text(app, 'application-number/doc-number') pat['appdate'] = get_text(app, 'filing-date') pat['appname'] = get_text(bib, 'assignee/organization-name') # title tech = bib.find('technical-information') pat['title'] = get_text(tech, 'title-of-invention') # ipc code ipcsec = tech.find('classification-ipc') pat['ipcver'] = get_text(ipcsec, 'classification-ipc-edition') if ipcsec is not None: ipclist = list(gen2_ipc(ipcsec)) if len(ipclist) > 0: pat['ipc1'] = ipclist[0] pat['ipc2'] = ';'.join(ipclist) # applicant info address = bib.find('correspondence-address/address') if address is not None: pat['city'] = get_text(address, 'city') pat['state'] = get_text(address, 'state') pat['country'] = get_text(address, 'country/country-code') # abstract abst = elem.find('subdoc-abstract') if abst is not None: pat['abstract'] = raw_text(abst, sep=' ') # roll it in return store_patent(pat)
def handle_patent(elem): pat = defaultdict(str) pat['gen'] = 3 # top-level section bib = elem.find('us-bibliographic-data-grant') pubref = bib.find('publication-reference') appref = bib.find('application-reference') # published patent pubinfo = pubref.find('document-id') pat['patnum'] = get_text(pubinfo, 'doc-number') pat['grantdate'] = get_text(pubinfo, 'date') # filing date pat['filedate'] = get_text(appref, 'document-id/date') # title pat['title'] = get_text(bib, 'invention-title') # ipc code ipclist = [] ipcsec = bib.find('classifications-ipcr') if ipcsec is not None: for ipc in ipcsec.findall('classification-ipcr'): ipclist.append( ('%s%s%s%3s%s' % (get_text(ipc, 'section'), get_text(ipc, 'class'), get_text(ipc, 'subclass'), get_text( ipc, 'main-group'), get_text(ipc, 'subgroup')), get_text(ipc, 'ipc-version-indicator/date'))) ipcsec = bib.find('classification-ipc') if ipcsec is not None: ipcver = get_text(ipcsec, 'edition') ipc0 = ipcsec.find('main-classification') for ipc in chain([ipc0], ipcsec.findall('further-classification')): itxt = ipc.text itxt = itxt[:4] + itxt[4:7].replace( '0', ' ') + itxt[7:].replace('/', '') ipclist.append((itxt, ipcver)) pat['ipclist'] = ipclist # us class oclsec = bib.find('classification-national') if oclsec is not None: pat['class'] = get_text(oclsec, 'main-classification') # claims pat['claims'] = get_text(bib, 'number-of-claims') # citations refs = bib.find('references-cited') prefix = '' if refs is None: refs = bib.find('us-references-cited') prefix = 'us-' cites = [] if refs is not None: for cite in refs.findall(prefix + 'citation'): pcite = cite.find('patcit') if pcite is not None: docid = pcite.find('document-id') pnum = get_text(docid, 'doc-number') kind = get_text(docid, 'kind') if kind == 'A' or kind.startswith('B'): cites.append(pnum) pat['citlist'] = cites # applicant name and address assignee = bib.find('assignees/assignee/addressbook') if assignee is not None: pat['owner'] = get_text(assignee, 'orgname').upper() address = assignee.find('address') pat['city'] = get_text(address, 'city').upper() pat['state'] = get_text(address, 'state') pat['country'] = get_text(address, 'country') # abstract abspar = elem.find('abstract') if abspar is not None: pat['abstract'] = raw_text(abspar, sep=' ') # roll it in return store_patent(pat)
def handle_patent(elem): pat = defaultdict(str) pat['gen'] = 3 # top-level section bib = elem.find('us-bibliographic-data-grant') pubref = bib.find('publication-reference') appref = bib.find('application-reference') # published patent pubinfo = pubref.find('document-id') pat['patnum'] = get_text(pubinfo, 'doc-number') pat['grantdate'] = get_text(pubinfo, 'date') # filing date pat['filedate'] = get_text(appref, 'document-id/date') # title pat['title'] = get_text(bib, 'invention-title') # ipc code ipclist = [] ipcsec = bib.find('classifications-ipcr') if ipcsec is not None: for ipc in ipcsec.findall('classification-ipcr'): ipclist.append(('%s%s%s%3s%s' % (get_text(ipc, 'section'), get_text(ipc, 'class'), get_text(ipc, 'subclass'), get_text(ipc, 'main-group'), get_text(ipc, 'subgroup')), get_text(ipc, 'ipc-version-indicator/date'))) ipcsec = bib.find('classification-ipc') if ipcsec is not None: ipcver = get_text(ipcsec, 'edition') ipc0 = ipcsec.find('main-classification') for ipc in chain([ipc0], ipcsec.findall('further-classification')): itxt = ipc.text itxt = itxt[:4] + itxt[4:7].replace('0',' ') + itxt[7:].replace('/','') ipclist.append((itxt, ipcver)) pat['ipclist'] = ipclist # claims pat['claims'] = get_text(bib, 'number-of-claims') # citations refs = bib.find('references-cited') prefix = '' if refs is None: refs = bib.find('us-references-cited') prefix = 'us-' cites = [] if refs is not None: for cite in refs.findall(prefix + 'citation'): pcite = cite.find('patcit') if pcite is not None: docid = pcite.find('document-id') pnum = get_text(docid, 'doc-number') kind = get_text(docid, 'kind') if kind == 'A' or kind.startswith('B'): cites.append(pnum) pat['citlist'] = cites # applicant name and address assignee = bib.find('assignees/assignee/addressbook') if assignee is not None: pat['owner'] = get_text(assignee, 'orgname').upper() address = assignee.find('address') pat['state'] = get_text(address, 'state') pat['country'] = get_text(address, 'country') # abstract abspar = elem.find('abstract') if abspar is not None: pat['abstract'] = raw_text(abspar, sep=' ') # roll it in return store_patent(pat)