Example #1
0
def get_register(number):
    """ Get EP Register data for a particular EP publication no. 
    (e.g. EP3065066) """
    # Add here to first check cached data? - do this externally to function?
    try:
        register_search = registered_client.register("publication", Epodoc(number))
        return register_search.json()
    except:
        return None
Example #2
0
 def family(self, document_number):
     """
     Request family information for single document with number in epodoc format.
     """
     logger.info('Requesting family information for document "{}"'.format(
         document_number))
     response = self.client.family('publication', Epodoc(document_number),
                                   'biblio')
     data = response.json()
     return data
def get_images_meta(ops_client, patent_label, path_json):
    # Try to retrieve JSON meta info from local, otherwise get online from OPS
    try:
        return json.load(file(path_json))
    except:
        try:
            ans = ops_client.published_data(reference_type='publication',
                                            input=Epodoc(patent_label), endpoint='images')
            file(path_json, 'w').write(ans.content)
            return ans.json()
        except Exception as err:
            print "...Image meta for {} error".format(patent_label), err
            if hasattr(err, 'response') and err.response.status_code == 404:
                file(path_json, 'w').write('{}')
    return None
Example #4
0
    def register(self, document_number):
        """
        Request register information for single document with number in epodoc format.
        """
        logger.info('Requesting register information for document "{}"'.format(
            document_number))
        try:
            response = self.client.register('publication',
                                            Epodoc(document_number))
        except requests.HTTPError as ex:
            response = ex.response

        try:
            data = response.json()
        except ValueError:
            data = None

        return data
                    pays = pays[0]
                for content in [typeSrc+'Abstract', typeSrc+'Claims',typeSrc+'Description']:

                    if content not in Nombre.keys():
                        Nombre [content] = 0
                    try:
                        lstfic = os.listdir(ResultPathContent+'//' + content)
                    except:
                        lstfic = []
                    endP= content.replace(typeSrc, "").lower()
                    if endP == 'abstract':
                        endP = 'biblio'
                    fichier = [fics[3:] for fics in lstfic]   # content already gathered
                    if ndb+'.txt' not in fichier: #hack here as chinese patents seems not be in claims or description endpoint
                    #, u'fulltext'
                        temp =('publication', Epodoc(pays+ndb[2:])) #, brevet[u'document-id'][u'kind']['$']))
                        try:
                            data = registered_client.published_data(*temp, endpoint = endP)             #registered_client.published_data()
                            if data.ok and content.replace(typeSrc, "").lower() in str(data.json()):
                                CheckDocDB = False
                            else:
                                CheckDocDB = True
                        except:
                            CheckDocDB = True
                        if CheckDocDB:
                            if isinstance(brevet[u'kind'], list):
                                tempoData = []
                                for cc in brevet[u'kind']:
                                    temp =('publication', Docdb(ndb[2:],pays, cc)) # hope all comes from same country
                                    try:
                                        tempoData.append(registered_client.published_data(*temp, endpoint = endP))
    pass
#os.chdir(ndf.replace('.dump', ''))
desc, clm, ft = 0,0,0
if GatherContent:

    for brevet in lstBrevet:
        #tempo =('publication', Docdb(,, ))
        #if brevet['label'] == 'FR2997041':
        ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
#check for already gathered patents    
        lstfic =[] #alreadycollected
        for content in [u'claims', u'description']:        
            lstfic += os.listdir(ResultPathContent+'//'+content+'//')
        fichier = [fics[3:] for fics in lstfic]      
        if fichier.count(ndb+'.txt') < 2: #one or both files claim or desc are missing
            tmp = Epodoc(ndb)
            tempo2 = ('publication', tmp)
            tmp = Docdb(ndb[2:], ndb[0:2],brevet['status'])
            tempo = ('publication', tmp)
                                           
            
            ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
            if True:  #avoid check of chinese patents since they aren't descibed in english
                for content in [u'claims', u'description']: #, u'fulltext'
                    if content not in os.listdir(RepDir):
                        os.makedirs(RepDir +'//' +content)
                          # optional, list of constituents
            
                    try :
                        data = registered_client.published_data(*tempo, endpoint = content)
                        #registered_client.published_data()
Example #7
0
def test_epodoc_as_api_input():
    params = ["US08/921,321", "B2", "20140122"]
    assert Epodoc(*params).as_api_input() == "(US08/921%2C321).(B2).(20140122)"

    params = ["US08/921,321", "", "20140122"]
    assert Epodoc(*params).as_api_input() == "(US08/921%2C321).(20140122)"
Example #8
0
def test_epodoc_required():
    with raises(MissingRequiredValue):
        Epodoc("")
Example #9
0
                ]:

                    if content not in Nombre.keys():
                        Nombre[content] = 0
                    try:
                        lstfic = os.listdir(ResultPathContent + '//' + content)
                    except:
                        lstfic = []
                    endP = content.replace(typeSrc, "").lower()
                    if endP == 'abstract':
                        endP = 'biblio'
                    fichier = [fics[3:]
                               for fics in lstfic]  # content already gathered
                    if ndb + '.txt' not in fichier:  #hack here as chinese patents seems not be in claims or description endpoint
                        #, u'fulltext'
                        temp = ('publication', Epodoc(pays + ndb[2:])
                                )  #, brevet[u'document-id'][u'kind']['$']))
                        try:
                            data = ops_client.published_data(
                                *temp,
                                endpoint=endP)  #ops_client.published_data()
                            if data.ok and content.replace(
                                    typeSrc, "").lower() in str(data.json()):
                                CheckDocDB = False
                            else:
                                CheckDocDB = True
                        except Exception as err:
                            CheckDocDB = True
                        if CheckDocDB:
                            if isinstance(brevet[u'kind'], list):
                                tempoData = []
Example #10
0
def OPSChercheAbstractBrevet(pat, DirStockage):
    import epo_ops
    from epo_ops.models import Docdb
    from epo_ops.models import Epodoc

    fic = open('../cles-epo.txt', 'r')
    key, secret = fic.read().split(',')
    key, secret = key.strip(), secret.strip()
    fic.close()
    ops_client = epo_ops.Client(key, secret)
    ops_client.accept_type = 'application/json'
    ndb = pat[
        'label']  #[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
    Abstracts = dict()
    if isinstance(ndb, list):
        ndb = ndb[0]
    #print("Retrieving ", ndb)
    pays = pat['country']

    for key in ['label', 'country', 'kind']:
        if isinstance(pat[key], list):
            pat[key] = list(
                set(pat[key])
            )  # hum some problem (again) in cleaning data within the family gatherer... 22/12/15
    if isinstance(pays, list):
        pays = pays[0]

    content = 'Abstract'
    endP = 'biblio'
    #
    temp = ('publication', Epodoc(pays + ndb[2:])
            )  #, brevet[u'document-id'][u'kind']['$']))
    try:
        data = ops_client.published_data(
            *temp, endpoint=endP)  #ops_client.published_data()
        if data.ok and 'abstract' in str(data.json()):
            CheckDocDB = False
        else:
            CheckDocDB = True
    except Exception as err:
        CheckDocDB = True
    if CheckDocDB:
        if isinstance(pat['kind'], list):
            tempoData = []
            for cc in pat['kind']:
                temp = ('publication', Docdb(ndb[2:], pays, cc)
                        )  # hope all comes from same country
                try:
                    tempoData.append(
                        ops_client.published_data(*temp, endpoint=endP))
                except:
                    data = None
                    pass
            for dat in tempoData:
                if dat is not None and dat.ok:
                    contenu = content

                    patentCont = dat.json()
                    Abstracts = MakeIram4(pat, patentCont, contenu)
                    # Make2Iram2 devrait formater le brevet dans un fichier txt au format Iramuteq dans le bon repertoire
                    # Lang est un truc :-) (je crois que cela renvoit la langue de l'abstract récupéré))
    else:
        temp = ('publication',
                Docdb(pat['label'][2:], pat['country'], pat['kind']))
        if data is not None and data.ok:
            contenu = content
            patentCont = data.json()
            Abstracts = MakeIram4(pat, patentCont, contenu)


#    if ops:world-patent-data exchange-documents exchange-documents abstract
    return Abstracts
Example #11
0
import re

import requests

from epo_ops.models import Docdb
from epo_ops.models import Epodoc
from epo_ops.models import Original

data = ('publication', Docdb('1000000', 'EP', 'A1'))
rdata = ('publication', Epodoc('EP1000000'))


def find_range(document, pattern):
    return re.search("range.*{0}".format(pattern), document)


def assert_request_success(response):
    assert response.status_code == requests.codes.ok
    assert response.headers['X-API'] == 'ops-v3.1'


def assert_family_success(client):
    response = client.family(*data)
    assert_request_success(response)
    assert 'patent-family' in response.text
    return response


def issue_published_data_request(client):
    return client.published_data(*data)
    


if GatherBibli and GatherBiblio:
    registered_client = epo_ops.RegisteredClient(key, secret)
    #        data = registered_client.family('publication', , 'biblio')
    registered_client.accept_type = 'application/json'  
    
    
    for brevet in lstBrevets:
        
        YetGathered = [u['label'] for u in BiblioPatents]
        # may be current patent has already be gathered in a previous attempt
        # should add a condition here to check in os.listdir()
        tempo =('publication', Docdb(brevet[u'document-id'][u'doc-number']['$'],brevet[u'document-id'][u'country']['$'], brevet[u'document-id'][u'kind']['$']))
        tempo2 =('publication', Epodoc(brevet[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']))#, brevet[u'document-id'][u'kind']['$']))
       
        ndb =brevet[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$'] #nameOfPatent
        if ndb not in YetGathered:      
             try: #trying Epodoc first, unused due to response format (multi document instead of one only)
                 data = registered_client.published_data(*tempo2, endpoint = 'biblio')
                 patentBib = data.json()
                 data2 = registered_client.published_data(*tempo, endpoint = 'biblio')
                 if data.ok and data2.ok:
                     patentBibtemp = data.json()
                     patentBibtemp2= data2.json()
                     if len(str(patentBibtemp)) > len(str(patentBibtemp2)):
                         patentBib = patentBibtemp
                     else:
                         patentBib = patentBibtemp2
             except:
Example #13
0
def test_epodoc_as_api_input():
    params = ['US08/921,321', 'B2', '20140122']
    assert Epodoc(*params).as_api_input() == '(US08/921%2C321).(B2).(20140122)'

    params = ['US08/921,321', '', '20140122']
    assert Epodoc(*params).as_api_input() == '(US08/921%2C321).(20140122)'
def assert_bulk_service_retrival_success(client):
    input_list = [Docdb("1000000", "EP", "A1"), Epodoc("US2018265402")]
    response = client.published_data("publication", input=input_list)

    assert response.status_code == requests.codes.ok
import re

import requests

from epo_ops.models import Docdb, Epodoc, Original

data = ("publication", Docdb("1000000", "EP", "A1"))
rdata = ("publication", Epodoc("EP1000000"))
idata = ("published-data/images/EP/1000000/A1/fullimage", 1)
# idata path is the result @path from images published-data json request


def find_range(document, pattern):
    return re.search("range.*{0}".format(pattern), document)


def assert_request_success(response):
    assert response.status_code == requests.codes.ok
    assert response.headers["X-API"] == "ops-v3.2"


def assert_family_success(client):
    response = client.family(*data)
    assert_request_success(response)
    assert "patent-family" in response.text
    return response


def assert_family_biblio_success(client):
    response = client.family(*data, constituents=["biblio"])
    assert_request_success(response)