def do_biblio_search(cql_query): client = epo_ops.Client(key=OPS_APIKEY, secret=OPS_SECRET) try: # getting search result counts search_response = client.published_data_search(cql=cql_query) except HTTPError: print('no patents found for cql query %s' % cql_query) return [] search_response_xml = str(search_response.content, encoding='utf-8') tree = et.fromstring(search_response_xml) biblio_search = tree.find('{http://ops.epo.org}biblio-search') found_patents = int(biblio_search.attrib['total-result-count']) print('%d patents found for %s query' % (found_patents, cql_query)) refs = biblio_search.findall( './{http://ops.epo.org}search-result/{http://ops.epo.org}publication-reference/' ) doc_ids = set() for pat_id in refs: #parsing search results #id_type = pat_id.attrib['document-id-type'] #print ([elem.tag for elem in pat_id.iter()]) #to get list of tags country = pat_id.find('./{http://www.epo.org/exchange}country').text doc_number = pat_id.find( './{http://www.epo.org/exchange}doc-number').text kind = pat_id.find('./{http://www.epo.org/exchange}kind').text doc_ids.add(country + doc_number + kind) return doc_ids
def get_epo_patents_for_search_term(search_term): client = epo_ops.Client(key='XXXXX', secret='XXXXX') # Instantiate client search_term_epo_patent_ids = [] search_term_epo_patent_country_origin = [] search_term_epo_patent_type = [] search_results = {} range_begin = 1 range_max = 500 while range_begin <= range_max: epo_patent_ids, epo_patent_countries, epo_patent_types = get_epo_patent_ids_for_search_term(client, search_term, range_begin, (range_begin + 99)) if not epo_patent_ids or not epo_patent_countries or not epo_patent_types: continue else: search_term_epo_patent_ids.extend(epo_patent_ids) search_term_epo_patent_country_origin.extend(epo_patent_countries) search_term_epo_patent_type.extend(epo_patent_types) range_begin += 100 # Create search text as dict. Applicants (Companies typically), inventor names, Title, abstract for i in range(0, len(search_term_epo_patent_ids)): if search_term_epo_patent_ids[i] is None or search_term_epo_patent_country_origin[i] is None or \ search_term_epo_patent_type[i] is None: continue else: search_results[search_term_epo_patent_ids[i]] = create_epo_patent(client, search_term_epo_patent_ids[i], search_term_epo_patent_country_origin[i], search_term_epo_patent_type[i]) return search_results
def checkRequest(req): ops_client = epo_ops.Client(key, secret) # data = ops_client.family('publication', , 'biblio') ops_client.accept_type = 'application/json' try: lstBrevets2, nbTrouves = PatentSearch(ops_client, req) return nbTrouves except: return 0
def download_patent(country, doc_number, kind): client = epo_ops.Client(key=OPS_APIKEY, secret=OPS_SECRET) return client.published_data( # Retrieve bibliography data reference_type='publication', # publication, application, priority input=epo_ops.models.Docdb(doc_number, country, kind), # original, docdb, epodoc endpoint= 'biblio', # biblio includes authors, title, abstract, document id, year, applicant-name: # https://worldwide.espacenet.com/help?locale=en_EP&method=handleHelpTopic&topic=bibliographic constituents=[] # optional, list of constituents )
def initiate_api_call(): # get api access keys my_key = os.getenv("EPO_KEY") my_secret_key = os.getenv("EPO_SECRET_KEY") # instantiate client client = epo_ops.Client(key=my_key, secret=my_secret_key, accept_type='json') print(dir(client)) return client
def ops_client(): ops_key = os.getenv("OPS_KEY") if not ops_key: raise RuntimeError( "'OPS_KEY' environment variable does not exist or is empty.") ops_secret = os.getenv("OPS_SECRET") if not ops_secret: raise RuntimeError( "'OPS_SECRET' environment variable does not exist or is empty.") return epo_ops.Client( key=ops_key, secret=ops_secret, middlewares=middlewares(), )
def get(self, identifier, credentials=None): if identifier not in self.clients: # TODO: Enable throttling and caching. ops = epo_ops.Client( key=credentials['consumer_key'], secret=credentials['consumer_secret'], accept_type='json', middlewares=[] ) # Attach metrics manager object to ops client instance. registry = get_current_registry() ops.metrics_manager = registry.getUtility(IUserMetricsManager) self.clients[identifier] = ops return self.clients.get(identifier)
def __init__(self, api_key, api_secret): self.api_key = api_key self.api_secret = api_secret # Sanity checks if not self.api_key or not self.api_secret: message = 'OPSClient needs OAuth credentials for accessing the OPS API' logger.error(message) raise ValueError(message) # Create OPS client instance middlewares = [ epo_ops.middlewares.Dogpile(), epo_ops.middlewares.Throttler(), ] self.client = epo_ops.Client(self.api_key, self.api_secret, accept_type='json', middlewares=middlewares)
from Patent2Net.P2N_Config import LoadConfig from Patent2Net.app.data.fusion_list import FusionList os.environ['REQUESTS_CA_BUNDLE'] = 'cacert.pem' global key global secret configFile = LoadConfig() final_ndf = configFile.ndf fic = open('./cles-epo.txt', 'r') key, secret = fic.read().split(',') key, secret = key.strip(), secret.strip() fic.close() ops_client = epo_ops.Client(key, secret) # data = ops_client.family('publication', , 'biblio') ops_client.accept_type = 'application/json' print("Usage: FusionPatList dir1 dir2 [...] dirN dirResult") ###tout est faux en changeant le modèle de stockage de fichiers #ListBiblioPath = ['..//DATA//'+ndf1+'//PatentBiblios', '..//DATA//'+ndf2+'//PatentBiblios'] #ResultListPath = ['..//DATA//'+ndf1+'//PatentLists', '..//DATA//'+ndf2+'//PatentLists']#List #ListContentPath = ['..//DATA//'+ndf1+'//PatentContents', '..//DATA//'+ndf2+'//PatentContents'] data = dict() import copy def BrevetFusion(Brevet1, Brevet2): BrevetFusion = copy.copy(Brevet1) BrevetFusion.extend(Brevet2)
if isinstance(dico[clef], list) and len(dico[clef]) ==1: dico[clef] = dico[clef][0] elif isinstance(dico[clef], list) and len(dico[clef]) == 0: dico[clef] = '' elif isinstance(dico[clef], list) and len(dico[clef]) >1: if '' in dico[clef]: for nb in range(dico[clef].count('')): dico[clef].remove('') else: pass return dico if IsEnableScript: GatherContent = True #not fun registered_client = epo_ops.Client(key, secret) # data = registered_client.family('publication', , 'biblio') registered_client.accept_type = 'application/json' for ndf in [fic2 for fic2 in os.listdir(ResultBiblioPath) if fic2.count('Description')==0]: if ndf.startswith('Families'): typeSrc = 'Families' else: typeSrc = '' if 'Description'+ndf or 'Description'+ndf.lower() in os.listdir(ResultListPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory ficBrevet = LoadBiblioFile(ResultListPath, ndf) else: #Retrocompatibility print 'gather your data again. sorry' sys.exit()
#user selects set of institutions to process country = input("Country to process: ") sheet = input("Sheet to process: ") #read in set of institutions inputFile = str(country) + '.xlsx' institutions = pd.ExcelFile(inputFile) institSheet = institutions.parse(sheet) filingNames = list(institSheet['PatentFilingName'].dropna().values) #Df for storing count values countDf = pd.DataFrame(index=filingNames, columns=['CountPatents']) #global variables for managing API client = epo_ops.Client(key='get your own key', secret='also yours') #instantiate client dataUse = 0 #total of data (in bytes) downloaded from OPS print('\nQueries running through search:') for instit in filingNames: #Going through institutions one by one query = generateQuery(instit) #Generate search query count, size = getCount(query) #Pull number of patents on EPO database for institutions countDf.loc[instit] = count #Store value dataUse = dataUse + size #Add size of returned object to total volume of data closed #Display aggregate size of data called print("Data volume called from OPS in this run: " + str(dataUse) + " bytes") #Export df exportName = country + "-" + sheet + "-patentCount.csv" countDf.to_csv(exportName, encoding='utf-8-sig')
response = client.published_data( # Retrieve bibliography data reference_type='publication', # publication, application, priority input=epo_ops.models.Docdb(pat_num, country, kind), # original, docdb, epodoc endpoint= 'biblio', # optional, defaults to biblio in case of published_data #optional, list of constituents ) tree = XMLparser(response) #parse the xml bib_data = get_bibdata(tree, NS) #get all biblo (title, dates) return bib_data if __name__ == "__main__": client = epo_ops.Client(key='62kB2O6tJtmG2RQsoOMJZUOhmbAlAkJ5', secret='WpsdCAOg9GyWw8i1') # Instantiate client for patent in patent_list: data = published_data_api(client, patent) family = family_data_api(client, patent) print data, family #Playground below: ''' documents = tree.findall("./epo:exchange-documents/epo:exchange-document", NS) for document in documents: i=1 while(1): bib_data = document.find("./epo:bibliographic-data", NS) pub_date=bib_data.find("./epo:publication-reference/epo:document-id[@document-id-type='epodoc']/epo:date", NS).text prior_date=bib_data.find('./epo:priority-claims/epo:priority-claim[@sequence="'+str(i)+'"]/epo:document-id[@document-id-type="epodoc"]/epo:date', NS) if prior_date!= None:
def OPSChercheAbstractBrevet(pat, DirStockage): import epo_ops from epo_ops.models import Docdb from epo_ops.models import Epodoc fic = open('../cles-epo.txt', 'r') key, secret = fic.read().split(',') key, secret = key.strip(), secret.strip() fic.close() ops_client = epo_ops.Client(key, secret) ops_client.accept_type = 'application/json' ndb = pat[ 'label'] #[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$']) Abstracts = dict() if isinstance(ndb, list): ndb = ndb[0] #print("Retrieving ", ndb) pays = pat['country'] for key in ['label', 'country', 'kind']: if isinstance(pat[key], list): pat[key] = list( set(pat[key]) ) # hum some problem (again) in cleaning data within the family gatherer... 22/12/15 if isinstance(pays, list): pays = pays[0] content = 'Abstract' endP = 'biblio' # temp = ('publication', Epodoc(pays + ndb[2:]) ) #, brevet[u'document-id'][u'kind']['$'])) try: data = ops_client.published_data( *temp, endpoint=endP) #ops_client.published_data() if data.ok and 'abstract' in str(data.json()): CheckDocDB = False else: CheckDocDB = True except Exception as err: CheckDocDB = True if CheckDocDB: if isinstance(pat['kind'], list): tempoData = [] for cc in pat['kind']: temp = ('publication', Docdb(ndb[2:], pays, cc) ) # hope all comes from same country try: tempoData.append( ops_client.published_data(*temp, endpoint=endP)) except: data = None pass for dat in tempoData: if dat is not None and dat.ok: contenu = content patentCont = dat.json() Abstracts = MakeIram4(pat, patentCont, contenu) # Make2Iram2 devrait formater le brevet dans un fichier txt au format Iramuteq dans le bon repertoire # Lang est un truc :-) (je crois que cela renvoit la langue de l'abstract récupéré)) else: temp = ('publication', Docdb(pat['label'][2:], pat['country'], pat['kind'])) if data is not None and data.ok: contenu = content patentCont = data.json() Abstracts = MakeIram4(pat, patentCont, contenu) # if ops:world-patent-data exchange-documents exchange-documents abstract return Abstracts
response = client.published_data( # Retrieve bibliography data reference_type='publication', # publication, application, priority input=epo_ops.models.Docdb('101430697', 'CN', 'B'), # original, docdb, epodoc endpoint= 'biblio', # optional, defaults to biblio in case of published_data #optional, list of constituents ) return response #main if __name__ == "__main__": pat = sys.argv # to be used in case of working with agrguments client = epo_ops.Client(key='GTfPiUhprNpUavoL2B1WBT7MK0y1A3jw', secret='b1D4WcgkXNXQ5VTq') # Instantiate client #get data from epo response = published_data(client) xml = beautify(response) #cleaning XML savefile(xml) #saving XML doc for parsing tree = ET.parse('data.xml') root = tree.getroot() pat_num = tree.find( './/publication-reference/document-id[@document-id-type="epodoc"]/doc-number' ).text #obselete stuff publication_date, priority_list = get_dates()