Esempio n. 1
0
                    type=str,
                    default=None,
                    help='database file to store to')
parser.add_argument('--limit', type=int, help='only parse n patents')
args = parser.parse_args()

# connect to patent db
con = sqlite3.connect(args.db)
cur = con.cursor()
cur.execute(
    'create table if not exists assign (assignid integer primary key, patnum int, execdate text, recdate text, conveyance text, assignor text, assignee text, assignee_state text, assignee_country text)'
)
cur.execute(
    'create unique index if not exists idx_assign on assign (patnum,execdate,assignor,assignee)'
)
chunker = ChunkInserter(con, table='assign')


def gen_patnums(patents):
    for pat in patents:
        for doc in pat.findall('document-id'):
            kind = get_text(doc, 'kind')
            pnum = get_text(doc, 'doc-number')
            if not kind.startswith('B'):
                continue
            yield pnum


# parseahol
i = 0
o = 0
Esempio n. 2
0
parser.add_argument('--db',
                    type=str,
                    default=None,
                    help='database file to store to')
args = parser.parse_args()

# open database
con = sqlite3.connect(args.db)
cur = con.cursor()

# create table
cur.execute('drop table if exists assign_use')
cur.execute(
    'create table assign_use (assignid integer primary key, patnum int, execdate text, recdate text, conveyance text, assignor text, assignee text, assignee_state text, assignee_country text)'
)
chunker = ChunkInserter(con, table='assign_use')

match_num = 0
rnum = 0
for row in cur.execute('select * from assign'):
    (assignee, assignor) = (row[5], row[6])

    assignor_toks = name_standardize_strong(assignor)
    assignee_toks = name_standardize_strong(assignee)

    word_match = 0
    for tok in assignor_toks:
        if tok in assignee_toks:
            word_match += 1

    word_match /= max(1.0, 0.5 * (len(assignor_toks) + len(assignee_toks)))
Esempio n. 3
0
# database setup
con = sqlite3.connect(args.db)
cur = con.cursor()
cur.execute(
    'create table if not exists patent (patnum int, filedate text, grantdate text, class text, ipc text, ipcver text, city text, state text, country text, owner text, claims int, title text, abstract text, gen int)'
)
cur.execute('create unique index if not exists idx_patnum on patent (patnum)')
cur.execute(
    'create table if not exists ipc (patnum int, code text, version text)')
cur.execute('create unique index if not exists ipc_pair on ipc (patnum,code)')
cur.execute('create index if not exists ipc_patnum on ipc (patnum)')
cur.execute('create index if not exists ipc_code on ipc (code)')
cur.execute('create table if not exists cite (src int, dst int)')
cur.execute('create unique index if not exists cite_pair on cite (src,dst)')
pat_chunker = ChunkInserter(con, table='patent')
ipc_chunker = ChunkInserter(con, table='ipc')
cit_chunker = ChunkInserter(con, table='cite')

# fields
fields = [
    'patnum',  # Patent number
    'filedate',  # Application date
    'grantdate',  # Publication date
    'class',  # US patent classification
    'ipc',  # IPC codes
    'ipcver',  # IPC version info
    'city',  # Assignee city
    'state',  # State code
    'country',  # Assignee country
    'owner',  # Assignee name
Esempio n. 4
0
parser.add_argument('--db', type=str, default=None, help='database file to store to')
parser.add_argument('--limit', type=int, help='only parse n patents')
args = parser.parse_args()

# database setup
con = sqlite3.connect(args.db)
cur = con.cursor()
cur.execute('create table if not exists patent (patnum int, filedate text, grantdate text, ipc text, ipcver text, state text, country text, owner text, claims int, title text, abstract text, gen int)')
cur.execute('create unique index if not exists idx_patnum on patent (patnum)')
cur.execute('create table if not exists ipc (patnum int, code text, version text)')
cur.execute('create unique index if not exists ipc_pair on ipc (patnum,code)')
cur.execute('create index if not exists ipc_patnum on ipc (patnum)')
cur.execute('create index if not exists ipc_code on ipc (code)')
cur.execute('create table if not exists cite (src int, dst int)')
cur.execute('create unique index if not exists cite_pair on cite (src,dst)')
pat_chunker = ChunkInserter(con, table='patent')
ipc_chunker = ChunkInserter(con, table='ipc')
cit_chunker = ChunkInserter(con, table='cite')

# fields
fields = [
    'patnum', # Patent number
    'filedate', # Application date
    'grantdate', # Publication date
    'ipc', # IPC codes
    'ipcver', # IPC version info
    'state', # Province code
    'country', # Application Country
    'owner', # Applicant name
    'claims', # Independent claim
    'title', # Title
Esempio n. 5
0
# MAIN SECTION

# parse input arguments
parser = argparse.ArgumentParser(description='USPTO patent parser.')
parser.add_argument('target', type=str, nargs='*', help='path or directory of file(s) to parse')
parser.add_argument('--db', type=str, default=None, help='database file to store to')
parser.add_argument('--limit', type=int, help='only parse n patents')
args = parser.parse_args()

# connect to patent db
con = sqlite3.connect(args.db)
cur = con.cursor()
cur.execute('create table if not exists assign (assignid integer primary key, patnum int, execdate text, recdate text, conveyance text, assignor text, assignee text, assignee_state text, assignee_country text)')
cur.execute('create unique index if not exists idx_assign on assign (patnum,execdate)')
chunker = ChunkInserter(con, table='assign')

def gen_patnums(patents):
    for pat in patents:
        for doc in pat.findall('document-id'):
            kind = get_text(doc, 'kind')
            pnum = get_text(doc, 'doc-number')
            if not kind.startswith('B'):
                continue
            yield pnum

# parseahol
i = 0
o = 0
p = 0
def parse_gen3(fname_in):
Esempio n. 6
0
            pp.feed('</root>\n')
            return parse_all()

# parse input arguments
parser = argparse.ArgumentParser(description='patent application parser')
parser.add_argument('target', type=str, nargs='*', help='path of file to parse')
parser.add_argument('--db', type=str, default=None, help='database file to store to')
parser.add_argument('--output', type=str, default=100000, help='how often to output summary')
args = parser.parse_args()

# database setup
con = sqlite3.connect(args.db)
cur = con.cursor()
cur.execute('create table if not exists apply (%s)' % sig)
cur.execute('create unique index if not exists idx_appnum on apply (appnum)')
chunker = ChunkInserter(con, table='apply')

# fields
fields = [
    'appnum', # Patent number
    'filedate', # Application date
    'grantdate', # Publication date
    'class', # US patent classification
    'ipc', # IPC codes
    'ipcver', # IPC version info
    'city', # Assignee city
    'state', # State code
    'country', # Assignee country
    'owner', # Assignee name
    'claims', # Independent claim
    'title', # Title