Beispiel #1
0
def test_docdb_required():
    with raises(MissingRequiredValue):
        Docdb('123', None, None)
    with raises(MissingRequiredValue):
        Docdb('123', '345', '')
    with raises(MissingRequiredValue):
        Docdb('', None, None)
Beispiel #2
0
def test_docdb_required():
    with raises(MissingRequiredValue):
        Docdb("123", None, None)
    with raises(MissingRequiredValue):
        Docdb("123", "345", "")
    with raises(MissingRequiredValue):
        Docdb("", None, None)
Beispiel #3
0
os.chdir(ndf.replace('.dump', ''))
desc, clm, ft = 0, 0, 0
for brevet in lstBrevets:
    #tempo =('publication', Docdb(,, ))
    ndb = brevet[u'document-id'][u'country']['$'] + brevet[u'document-id'][
        u'doc-number']['$']
    for content in [u'claims', u'description', u'fulltext']:
        if content not in filter(os.path.isdir, os.listdir(os.getcwd())):
            os.makedirs(content)
            # optional, list of constituents

        try:
            data = registered_client.published_data(
                reference_type='publication',
                input=Docdb(brevet[u'document-id'][u'doc-number']['$'],
                            brevet[u'document-id'][u'country']['$'],
                            brevet[u'document-id'][u'kind']['$']),
                endpoint=content,
                constituents=[])
            if data.status_code == 403:
                #making necessary redirections
                print data
            if data.ok:
                patentCont = data.json()

                #withch language ?
                #the following could be factorized !!!!!!!!
                if content == 'description':
                    description = []
                    description = patentCont[u'ops:world-patent-data'][
                        u'ftxt:fulltext-documents'][u'ftxt:fulltext-document'][
                    if ndb+'.txt' not in fichier: #hack here as chinese patents seems not be in claims or description endpoint
                    #, u'fulltext'
                        temp =('publication', Epodoc(pays+ndb[2:])) #, brevet[u'document-id'][u'kind']['$']))
                        try:
                            data = registered_client.published_data(*temp, endpoint = endP)             #registered_client.published_data()
                            if data.ok and content.replace(typeSrc, "").lower() in str(data.json()):
                                CheckDocDB = False
                            else:
                                CheckDocDB = True
                        except:
                            CheckDocDB = True
                        if CheckDocDB:
                            if isinstance(brevet[u'kind'], list):
                                tempoData = []
                                for cc in brevet[u'kind']:
                                    temp =('publication', Docdb(ndb[2:],pays, cc)) # hope all comes from same country
                                    try:
                                        tempoData.append(registered_client.published_data(*temp, endpoint = endP))
                                    except:
                                        data = None
                                        pass
                                for dat in tempoData:
                                    if dat is not None and dat.ok: #doing the same for all content. This may result in redundancy
                                        contenu = content.replace(typeSrc, "").lower()

                                        patentCont = dat.json()
                                        Langs = MakeIram2(brevet, ndb +'.txt', patentCont, RepDir+ '//'+ typeSrc + contenu+'//', contenu)
                                        if endP == 'biblio':
                                            for contenu in ['claims', 'description']:
                                                Langs = MakeIram2(brevet, ndb +'.txt', patentCont, RepDir+ '//'+ typeSrc + contenu+'//', contenu)
                            else:
Beispiel #5
0
def issue_request(client):
    return client.published_data("publication",
                                 Docdb("Quota", "Forbidden", "exceeded"))
desc, clm, ft = 0,0,0
if GatherContent:

    for brevet in lstBrevet:
        #tempo =('publication', Docdb(,, ))
        #if brevet['label'] == 'FR2997041':
        ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
#check for already gathered patents    
        lstfic =[] #alreadycollected
        for content in [u'claims', u'description']:        
            lstfic += os.listdir(ResultPathContent+'//'+content+'//')
        fichier = [fics[3:] for fics in lstfic]      
        if fichier.count(ndb+'.txt') < 2: #one or both files claim or desc are missing
            tmp = Epodoc(ndb)
            tempo2 = ('publication', tmp)
            tmp = Docdb(ndb[2:], ndb[0:2],brevet['status'])
            tempo = ('publication', tmp)
                                           
            
            ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
            if True:  #avoid check of chinese patents since they aren't descibed in english
                for content in [u'claims', u'description']: #, u'fulltext'
                    if content not in os.listdir(RepDir):
                        os.makedirs(RepDir +'//' +content)
                          # optional, list of constituents
            
                    try :
                        data = registered_client.published_data(*tempo, endpoint = content)
                        #registered_client.published_data()
                        if data.ok:
                            patentCont=data.json()
Beispiel #7
0
def test_docdb_as_api_input():
    params = ["US08/921,321", "CC", "B2", "20140122"]
    expected = "(CC).(US08/921%2C321).(B2).(20140122)"
    assert Docdb(*params).as_api_input() == expected
Beispiel #8
0
                                *temp,
                                endpoint=endP)  #ops_client.published_data()
                            if data.ok and content.replace(
                                    typeSrc, "").lower() in str(data.json()):
                                CheckDocDB = False
                            else:
                                CheckDocDB = True
                        except Exception as err:
                            CheckDocDB = True
                        if CheckDocDB:
                            if isinstance(brevet[u'kind'], list):
                                tempoData = []
                                for cc in brevet[u'kind']:
                                    temp = (
                                        'publication',
                                        Docdb(ndb[2:], pays, cc)
                                    )  # hope all comes from same country
                                    try:
                                        tempoData.append(
                                            ops_client.published_data(
                                                *temp, endpoint=endP))
                                    except:
                                        data = None
                                        pass
                                for dat in tempoData:
                                    if dat is not None and dat.ok:  #doing the same for all content. This may result in redundancy
                                        contenu = content.replace(typeSrc, "")

                                        patentCont = dat.json()
                                        Langs = MakeIram2(
                                            brevet, ndb + '.txt', patentCont,
Beispiel #9
0
def OPSChercheAbstractBrevet(pat, DirStockage):
    import epo_ops
    from epo_ops.models import Docdb
    from epo_ops.models import Epodoc

    fic = open('../cles-epo.txt', 'r')
    key, secret = fic.read().split(',')
    key, secret = key.strip(), secret.strip()
    fic.close()
    ops_client = epo_ops.Client(key, secret)
    ops_client.accept_type = 'application/json'
    ndb = pat[
        'label']  #[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
    Abstracts = dict()
    if isinstance(ndb, list):
        ndb = ndb[0]
    #print("Retrieving ", ndb)
    pays = pat['country']

    for key in ['label', 'country', 'kind']:
        if isinstance(pat[key], list):
            pat[key] = list(
                set(pat[key])
            )  # hum some problem (again) in cleaning data within the family gatherer... 22/12/15
    if isinstance(pays, list):
        pays = pays[0]

    content = 'Abstract'
    endP = 'biblio'
    #
    temp = ('publication', Epodoc(pays + ndb[2:])
            )  #, brevet[u'document-id'][u'kind']['$']))
    try:
        data = ops_client.published_data(
            *temp, endpoint=endP)  #ops_client.published_data()
        if data.ok and 'abstract' in str(data.json()):
            CheckDocDB = False
        else:
            CheckDocDB = True
    except Exception as err:
        CheckDocDB = True
    if CheckDocDB:
        if isinstance(pat['kind'], list):
            tempoData = []
            for cc in pat['kind']:
                temp = ('publication', Docdb(ndb[2:], pays, cc)
                        )  # hope all comes from same country
                try:
                    tempoData.append(
                        ops_client.published_data(*temp, endpoint=endP))
                except:
                    data = None
                    pass
            for dat in tempoData:
                if dat is not None and dat.ok:
                    contenu = content

                    patentCont = dat.json()
                    Abstracts = MakeIram4(pat, patentCont, contenu)
                    # Make2Iram2 devrait formater le brevet dans un fichier txt au format Iramuteq dans le bon repertoire
                    # Lang est un truc :-) (je crois que cela renvoit la langue de l'abstract récupéré))
    else:
        temp = ('publication',
                Docdb(pat['label'][2:], pat['country'], pat['kind']))
        if data is not None and data.ok:
            contenu = content
            patentCont = data.json()
            Abstracts = MakeIram4(pat, patentCont, contenu)


#    if ops:world-patent-data exchange-documents exchange-documents abstract
    return Abstracts
Beispiel #10
0
import re

import requests

from epo_ops.models import Docdb
from epo_ops.models import Epodoc
from epo_ops.models import Original

data = ('publication', Docdb('1000000', 'EP', 'A1'))
rdata = ('publication', Epodoc('EP1000000'))


def find_range(document, pattern):
    return re.search("range.*{0}".format(pattern), document)


def assert_request_success(response):
    assert response.status_code == requests.codes.ok
    assert response.headers['X-API'] == 'ops-v3.1'


def assert_family_success(client):
    response = client.family(*data)
    assert_request_success(response)
    assert 'patent-family' in response.text
    return response


def issue_published_data_request(client):
    return client.published_data(*data)
        return None, DejaLa, BiblioPatents
    


if GatherBibli and GatherBiblio:
    registered_client = epo_ops.RegisteredClient(key, secret)
    #        data = registered_client.family('publication', , 'biblio')
    registered_client.accept_type = 'application/json'  
    
    
    for brevet in lstBrevets:
        
        YetGathered = [u['label'] for u in BiblioPatents]
        # may be current patent has already be gathered in a previous attempt
        # should add a condition here to check in os.listdir()
        tempo =('publication', Docdb(brevet[u'document-id'][u'doc-number']['$'],brevet[u'document-id'][u'country']['$'], brevet[u'document-id'][u'kind']['$']))
        tempo2 =('publication', Epodoc(brevet[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']))#, brevet[u'document-id'][u'kind']['$']))
       
        ndb =brevet[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$'] #nameOfPatent
        if ndb not in YetGathered:      
             try: #trying Epodoc first, unused due to response format (multi document instead of one only)
                 data = registered_client.published_data(*tempo2, endpoint = 'biblio')
                 patentBib = data.json()
                 data2 = registered_client.published_data(*tempo, endpoint = 'biblio')
                 if data.ok and data2.ok:
                     patentBibtemp = data.json()
                     patentBibtemp2= data2.json()
                     if len(str(patentBibtemp)) > len(str(patentBibtemp2)):
                         patentBib = patentBibtemp
                     else:
                         patentBib = patentBibtemp2
def issue_request(client):
    return client.published_data(
        'publication',
        Docdb('Quota', 'Forbidden', 'exceeded')
    )
Beispiel #13
0
def test_docdb_as_api_input():
    params = ['US08/921,321', 'CC', 'B2', '20140122']
    expected = '(CC).(US08/921%2C321).(B2).(20140122)'
    assert Docdb(*params).as_api_input() == expected
        brevet = CleanPatent(brevet)
        brevet = CleanPatent(brevet)
        ndb =brevet[u'label']#[u'document-id'][u'country']['$']+brevet[u'document-id'][u'doc-number']['$']brevet['publication-ref'][u'document-id'][0][u'kind']['$'])
#check for already gathered patents        
        lstfic = os.listdir(ResultPathContent+'//Abstracts/')
        fichier = [fics[3:] for fics in lstfic]      
        if ndb+'.txt' not in fichier:
            for content in [u'abstract']:#claims', u'description']: #, u'fulltext'              
                try :
                    
                    tmp = Epodoc(ndb)
                    
                    tempo = ('publication', tmp)
                    data = registered_client.published_data(*tempo, endpoint = content)             #registered_client.published_data()
                    if 'abstract' not in str(data.json()):
                        tmp = Docdb(ndb[2:], ndb[0:2],brevet['status'])
                        tempo = ('publication', tmp)
                        data = registered_client.published_data(*tempo, endpoint = content)
                                               #brevet['publication-ref'][u'document-id'][0][u'doc-number']['$'],#brevet[u'document-id'][u'doc-number']['$'], 
                                #brevet['publication-ref'][u'document-id'][0][u'country']['$'],#brevet[u'document-id'][u'country']['$'], 
                                #brevet['publication-ref'][u'document-id'][0][u'kind']['$']), endpoint = content, constituents = [])
                except:
                    try:
                        tmp = Docdb(ndb[2:], ndb[0:2],brevet['status'])
                        tempo = ('publication', tmp)
                        data = registered_client.published_data(*tempo, endpoint = content)         #registered_client.published_data()

                    except:#from there totally fun... may be we do not get there...
                        try:
#                            print 'yes we get'
                            tmp = Epodoc(brevet['publication-ref'][u'document-id'][1][u'doc-number']['$'])
def assert_bulk_service_retrival_success(client):
    input_list = [Docdb("1000000", "EP", "A1"), Epodoc("US2018265402")]
    response = client.published_data("publication", input=input_list)

    assert response.status_code == requests.codes.ok
import re

import requests

from epo_ops.models import Docdb, Epodoc, Original

data = ("publication", Docdb("1000000", "EP", "A1"))
rdata = ("publication", Epodoc("EP1000000"))
idata = ("published-data/images/EP/1000000/A1/fullimage", 1)
# idata path is the result @path from images published-data json request


def find_range(document, pattern):
    return re.search("range.*{0}".format(pattern), document)


def assert_request_success(response):
    assert response.status_code == requests.codes.ok
    assert response.headers["X-API"] == "ops-v3.2"


def assert_family_success(client):
    response = client.family(*data)
    assert_request_success(response)
    assert "patent-family" in response.text
    return response


def assert_family_biblio_success(client):
    response = client.family(*data, constituents=["biblio"])
    assert_request_success(response)