Exemple #1
0
def do_biblio_search(cql_query):
    client = epo_ops.Client(key=OPS_APIKEY, secret=OPS_SECRET)
    try:
        # getting search result counts
        search_response = client.published_data_search(cql=cql_query)
    except HTTPError:
        print('no patents found for cql query %s' % cql_query)
        return []

    search_response_xml = str(search_response.content, encoding='utf-8')
    tree = et.fromstring(search_response_xml)
    biblio_search = tree.find('{http://ops.epo.org}biblio-search')
    found_patents = int(biblio_search.attrib['total-result-count'])
    print('%d patents found for %s query' % (found_patents, cql_query))
    refs = biblio_search.findall(
        './{http://ops.epo.org}search-result/{http://ops.epo.org}publication-reference/'
    )

    doc_ids = set()
    for pat_id in refs:
        #parsing search results
        #id_type = pat_id.attrib['document-id-type']
        #print ([elem.tag for elem in pat_id.iter()]) #to get list of tags
        country = pat_id.find('./{http://www.epo.org/exchange}country').text
        doc_number = pat_id.find(
            './{http://www.epo.org/exchange}doc-number').text
        kind = pat_id.find('./{http://www.epo.org/exchange}kind').text
        doc_ids.add(country + doc_number + kind)

    return doc_ids
Exemple #2
0
def get_epo_patents_for_search_term(search_term):
    client = epo_ops.Client(key='XXXXX', secret='XXXXX')  # Instantiate client

    search_term_epo_patent_ids = []
    search_term_epo_patent_country_origin = []
    search_term_epo_patent_type = []
    search_results = {}

    range_begin = 1
    range_max = 500

    while range_begin <= range_max:
        epo_patent_ids, epo_patent_countries, epo_patent_types = get_epo_patent_ids_for_search_term(client, search_term,
                                                                                                    range_begin,
                                                                                                    (range_begin + 99))
        if not epo_patent_ids or not epo_patent_countries or not epo_patent_types:
            continue
        else:
            search_term_epo_patent_ids.extend(epo_patent_ids)
            search_term_epo_patent_country_origin.extend(epo_patent_countries)
            search_term_epo_patent_type.extend(epo_patent_types)
        range_begin += 100

    # Create search text as dict. Applicants (Companies typically), inventor names, Title, abstract

    for i in range(0, len(search_term_epo_patent_ids)):
        if search_term_epo_patent_ids[i] is None or search_term_epo_patent_country_origin[i] is None or \
                search_term_epo_patent_type[i] is None:
            continue
        else:
            search_results[search_term_epo_patent_ids[i]] = create_epo_patent(client, search_term_epo_patent_ids[i],
                                                                              search_term_epo_patent_country_origin[i],
                                                                              search_term_epo_patent_type[i])
    return search_results
Exemple #3
0
def checkRequest(req):
    ops_client = epo_ops.Client(key, secret)
    #        data = ops_client.family('publication', , 'biblio')
    ops_client.accept_type = 'application/json'
    try:
        lstBrevets2, nbTrouves = PatentSearch(ops_client, req)
        return nbTrouves
    except:
        return 0
Exemple #4
0
def download_patent(country, doc_number, kind):
    client = epo_ops.Client(key=OPS_APIKEY, secret=OPS_SECRET)
    return client.published_data(  # Retrieve bibliography data
        reference_type='publication',  # publication, application, priority
        input=epo_ops.models.Docdb(doc_number, country,
                                   kind),  # original, docdb, epodoc
        endpoint=
        'biblio',  # biblio includes authors, title, abstract, document id, year, applicant-name:
        # https://worldwide.espacenet.com/help?locale=en_EP&method=handleHelpTopic&topic=bibliographic
        constituents=[]  # optional, list of constituents
    )
Exemple #5
0
def initiate_api_call():

    # get api access keys
    my_key = os.getenv("EPO_KEY")
    my_secret_key = os.getenv("EPO_SECRET_KEY")

    # instantiate client
    client = epo_ops.Client(key=my_key,
                            secret=my_secret_key,
                            accept_type='json')

    print(dir(client))
    return client
Exemple #6
0
def ops_client():
    ops_key = os.getenv("OPS_KEY")
    if not ops_key:
        raise RuntimeError(
            "'OPS_KEY' environment variable does not exist or is empty.")
    ops_secret = os.getenv("OPS_SECRET")
    if not ops_secret:
        raise RuntimeError(
            "'OPS_SECRET' environment variable does not exist or is empty.")
    return epo_ops.Client(
        key=ops_key,
        secret=ops_secret,
        middlewares=middlewares(),
    )
Exemple #7
0
    def get(self, identifier, credentials=None):
        if identifier not in self.clients:

            # TODO: Enable throttling and caching.
            ops = epo_ops.Client(
                key=credentials['consumer_key'], secret=credentials['consumer_secret'],
                accept_type='json', middlewares=[]
            )

            # Attach metrics manager object to ops client instance.
            registry = get_current_registry()
            ops.metrics_manager = registry.getUtility(IUserMetricsManager)
            self.clients[identifier] = ops

        return self.clients.get(identifier)
Exemple #8
0
    def __init__(self, api_key, api_secret):

        self.api_key = api_key
        self.api_secret = api_secret

        # Sanity checks
        if not self.api_key or not self.api_secret:
            message = 'OPSClient needs OAuth credentials for accessing the OPS API'
            logger.error(message)
            raise ValueError(message)

        # Create OPS client instance
        middlewares = [
            epo_ops.middlewares.Dogpile(),
            epo_ops.middlewares.Throttler(),
        ]
        self.client = epo_ops.Client(self.api_key,
                                     self.api_secret,
                                     accept_type='json',
                                     middlewares=middlewares)
Exemple #9
0
from Patent2Net.P2N_Config import LoadConfig
from Patent2Net.app.data.fusion_list import FusionList

os.environ['REQUESTS_CA_BUNDLE'] = 'cacert.pem'
global key
global secret


configFile = LoadConfig()
final_ndf = configFile.ndf

fic = open('./cles-epo.txt', 'r')
key, secret = fic.read().split(',')
key, secret = key.strip(), secret.strip()
fic.close()
ops_client = epo_ops.Client(key, secret)
    #        data = ops_client.family('publication', , 'biblio')
ops_client.accept_type = 'application/json'

print("Usage: FusionPatList dir1 dir2 [...] dirN dirResult")

###tout est faux en changeant le modèle de stockage de fichiers
#ListBiblioPath = ['..//DATA//'+ndf1+'//PatentBiblios', '..//DATA//'+ndf2+'//PatentBiblios']
#ResultListPath = ['..//DATA//'+ndf1+'//PatentLists', '..//DATA//'+ndf2+'//PatentLists']#List
#ListContentPath = ['..//DATA//'+ndf1+'//PatentContents', '..//DATA//'+ndf2+'//PatentContents']

data = dict()
import copy
def BrevetFusion(Brevet1, Brevet2):
    BrevetFusion = copy.copy(Brevet1)
    BrevetFusion.extend(Brevet2)
        if isinstance(dico[clef], list) and len(dico[clef]) ==1:
            dico[clef] = dico[clef][0]
        elif isinstance(dico[clef], list) and len(dico[clef]) == 0:
            dico[clef] = ''
        elif isinstance(dico[clef], list) and len(dico[clef]) >1:
            if '' in dico[clef]:
                for nb in range(dico[clef].count('')):
                    dico[clef].remove('')
        else:
            pass
    return dico

if IsEnableScript:
    GatherContent = True
    #not fun
    registered_client = epo_ops.Client(key, secret)
    #        data = registered_client.family('publication', , 'biblio')
    registered_client.accept_type = 'application/json'

    for ndf in [fic2 for fic2 in os.listdir(ResultBiblioPath) if fic2.count('Description')==0]:
        if ndf.startswith('Families'):
            typeSrc = 'Families'
        else:
            typeSrc = ''
        if 'Description'+ndf or 'Description'+ndf.lower() in os.listdir(ResultListPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            ficBrevet = LoadBiblioFile(ResultListPath, ndf)

        else: #Retrocompatibility
            print 'gather your data again. sorry'
            sys.exit()
Exemple #11
0
#user selects set of institutions to process
country = input("Country to process: ")
sheet = input("Sheet to process: ")

#read in set of institutions
inputFile = str(country) + '.xlsx'
institutions = pd.ExcelFile(inputFile)
institSheet = institutions.parse(sheet)
filingNames = list(institSheet['PatentFilingName'].dropna().values)

#Df for storing count values
countDf = pd.DataFrame(index=filingNames, columns=['CountPatents'])

#global variables for managing API
client = epo_ops.Client(key='get your own key', secret='also yours') #instantiate client
dataUse = 0 #total of data (in bytes) downloaded from OPS

print('\nQueries running through search:')
for instit in filingNames: #Going through institutions one by one
	query = generateQuery(instit) #Generate search query
	count, size = getCount(query) #Pull number of patents on EPO database for institutions
	countDf.loc[instit] = count #Store value
	dataUse = dataUse + size #Add size of returned object to total volume of data closed

#Display aggregate size of data called
print("Data volume called from OPS in this run: " + str(dataUse) + " bytes")

#Export df
exportName = country + "-" + sheet + "-patentCount.csv"
countDf.to_csv(exportName, encoding='utf-8-sig')
Exemple #12
0
    response = client.published_data(  # Retrieve bibliography data
        reference_type='publication',  # publication, application, priority
        input=epo_ops.models.Docdb(pat_num, country,
                                   kind),  # original, docdb, epodoc
        endpoint=
        'biblio',  # optional, defaults to biblio in case of published_data
        #optional, list of constituents
    )
    tree = XMLparser(response)  #parse the xml
    bib_data = get_bibdata(tree, NS)  #get all biblo (title, dates)
    return bib_data


if __name__ == "__main__":

    client = epo_ops.Client(key='62kB2O6tJtmG2RQsoOMJZUOhmbAlAkJ5',
                            secret='WpsdCAOg9GyWw8i1')  # Instantiate client
    for patent in patent_list:
        data = published_data_api(client, patent)
        family = family_data_api(client, patent)
        print data, family

#Playground below:
'''
documents = tree.findall("./epo:exchange-documents/epo:exchange-document", NS)
for document in documents:
	i=1
	while(1):
		bib_data = document.find("./epo:bibliographic-data", NS)
		pub_date=bib_data.find("./epo:publication-reference/epo:document-id[@document-id-type='epodoc']/epo:date", NS).text
		prior_date=bib_data.find('./epo:priority-claims/epo:priority-claim[@sequence="'+str(i)+'"]/epo:document-id[@document-id-type="epodoc"]/epo:date', NS)
		if prior_date!= None:
Exemple #13
0
def OPSChercheAbstractBrevet(pat, DirStockage):
    import epo_ops
    from epo_ops.models import Docdb
    from epo_ops.models import Epodoc

    fic = open('../cles-epo.txt', 'r')
    key, secret = fic.read().split(',')
    key, secret = key.strip(), secret.strip()
    fic.close()
    ops_client = epo_ops.Client(key, secret)
    ops_client.accept_type = 'application/json'
    ndb = pat[
        'label']  #[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
    Abstracts = dict()
    if isinstance(ndb, list):
        ndb = ndb[0]
    #print("Retrieving ", ndb)
    pays = pat['country']

    for key in ['label', 'country', 'kind']:
        if isinstance(pat[key], list):
            pat[key] = list(
                set(pat[key])
            )  # hum some problem (again) in cleaning data within the family gatherer... 22/12/15
    if isinstance(pays, list):
        pays = pays[0]

    content = 'Abstract'
    endP = 'biblio'
    #
    temp = ('publication', Epodoc(pays + ndb[2:])
            )  #, brevet[u'document-id'][u'kind']['$']))
    try:
        data = ops_client.published_data(
            *temp, endpoint=endP)  #ops_client.published_data()
        if data.ok and 'abstract' in str(data.json()):
            CheckDocDB = False
        else:
            CheckDocDB = True
    except Exception as err:
        CheckDocDB = True
    if CheckDocDB:
        if isinstance(pat['kind'], list):
            tempoData = []
            for cc in pat['kind']:
                temp = ('publication', Docdb(ndb[2:], pays, cc)
                        )  # hope all comes from same country
                try:
                    tempoData.append(
                        ops_client.published_data(*temp, endpoint=endP))
                except:
                    data = None
                    pass
            for dat in tempoData:
                if dat is not None and dat.ok:
                    contenu = content

                    patentCont = dat.json()
                    Abstracts = MakeIram4(pat, patentCont, contenu)
                    # Make2Iram2 devrait formater le brevet dans un fichier txt au format Iramuteq dans le bon repertoire
                    # Lang est un truc :-) (je crois que cela renvoit la langue de l'abstract récupéré))
    else:
        temp = ('publication',
                Docdb(pat['label'][2:], pat['country'], pat['kind']))
        if data is not None and data.ok:
            contenu = content
            patentCont = data.json()
            Abstracts = MakeIram4(pat, patentCont, contenu)


#    if ops:world-patent-data exchange-documents exchange-documents abstract
    return Abstracts
Exemple #14
0
    response = client.published_data(  # Retrieve bibliography data
        reference_type='publication',  # publication, application, priority
        input=epo_ops.models.Docdb('101430697', 'CN',
                                   'B'),  # original, docdb, epodoc
        endpoint=
        'biblio',  # optional, defaults to biblio in case of published_data
        #optional, list of constituents
    )
    return response


#main
if __name__ == "__main__":

    pat = sys.argv  # to be used in case of working with agrguments
    client = epo_ops.Client(key='GTfPiUhprNpUavoL2B1WBT7MK0y1A3jw',
                            secret='b1D4WcgkXNXQ5VTq')  # Instantiate client

    #get data from epo
    response = published_data(client)

    xml = beautify(response)  #cleaning XML

    savefile(xml)  #saving XML doc for parsing

    tree = ET.parse('data.xml')
    root = tree.getroot()

    pat_num = tree.find(
        './/publication-reference/document-id[@document-id-type="epodoc"]/doc-number'
    ).text  #obselete stuff
    publication_date, priority_list = get_dates()