import requests from bs4 import BeautifulSoup from cnsipo.utils import retry, JobQueue, threaded from cnsipo.shared import get_logger, ContentError, FORGIVEN_ERROR, \ DETAIL_KINDS KINDS = ['fmgb', 'fmsq', 'syxx', 'wgsq'] STR_SRC = ['fmmost', 'fmmost', 'xxmost', 'wgmost'] STR_WHERE = ['GB', 'SQ', 'GB', 'SQ'] DELAY = 3 RETRIES = 1000 logger = get_logger() def detail_params(patent_id, kind): params = { 'strSources': STR_SRC[kind], 'strWhere': "申请号='{}' and {}INDEX=1".format( patent_id, STR_WHERE[kind]), 'strLicenseCode': "", 'pageNow': 1 } return "http://epub.sipo.gov.cn/patentdetail.action", params def detail_parse(bs, kind): # TODO: not work for kind 'wgsq' details = {} tbl = bs.table.table
# -*- coding: utf-8 -*- """ Create a UIG(University/Industry/Government) database """ from __future__ import print_function import sys from optparse import OptionParser import psycopg2 from cnsipo.shared import get_logger from cnsipo.patent_parser import PatentParser logger = get_logger() APP_NO = 'app_no' APP_YEAR = 'app_year' STATE = 'state' ADDRESS = 'address' APPLICANT = 'applicant' COLLAB = 'collab' KIND = 'kind' ORG = 'org' ORG2 = 'org2' patent_parser = None def gen_uig_data(conn, table, aux_tbl, year, batch_size): stmt = "SELECT {}, {}, {} FROM {} WHERE {} IN "\