Exemple #1
0
def GetFamilly(client, brev, rep):
    from OPS2NetUtils2 import ExtractClassificationSimple2, SeparateCountryField, ExtractAbstract, UniClean
    import epo_ops
    import datetime
    ResultContents = rep
    lstres = []
    comptExcept = 0
    
#    try:
#        url ='http://ops.epo.org/3.1/rest-services/family/publication/docdb/' +brev['label'] +'/biblio'
#        
#        data = requests.get(url, headers = headers)
    dico = None
    try:
        data = client.family('publication', epo_ops.models.Epodoc(brev['label']), 'biblio')
        data = data.json()
        dico = data[u'ops:world-patent-data'][u'ops:patent-family'][u'ops:family-member']
        #PatentDataFam[brev['label']] = dict()
        if type(dico) == type(dict()):
            dico=[dico]
        cpt = 1
    except:
        try:
            data = client.family('publication', epo_ops.models.Docdb(brev['label'][2:], brev['label'][0:2],brev['portee']))
            data = data.json()
            dico = data[u'ops:world-patent-data'][u'ops:patent-family'][u'ops:family-member']
            #PatentDataFam[brev['label']] = dict()
            if type(dico) == type(dict()):
                dico=[dico]
            cpt = 1
        except:

            print "nothing found for ", brev
            print "ignoring"
            return None

    if dico is not None:

        for donnee in dico:
            Go =True
            Brevet=dict(dict(dict(dict())))
            Brevet[u'ops:world-patent-data'] =dict()
            Brevet[u'ops:world-patent-data']['ops:biblio-search'] =dict()
            Brevet[u'ops:world-patent-data']['ops:biblio-search']['ops:search-result'] =dict()
            Brevet[u'ops:world-patent-data']['ops:biblio-search']['ops:search-result'][u'exchange-documents'] = donnee #hum no sure that it is a good way
            PatentData = dict()
            Req = Brevet
               
            try:
                PatentData[u'label'] = donnee[u'exchange-document'][u'bibliographic-data'][u'publication-reference'][u'document-id'][1][u'doc-number'][u'$']
            except:
                try:
                    PatentData[u'label'] = donnee[u'publication-reference'][u'document-id'][1][u'doc-number']['$']
                except:
                    print "no label ?"
                    Go = False
                   # print pprint.pprint(donnee)

            if Go:
                #PatentDataFam[PatentData['label']] = dict()
                PatentData[u'titre'] = UniClean(ExtraitTitleEn(Req))                  
#                    print "Patent title(s)", PatentData['titre']
              
                PatentData[u'inventeur'] = UniClean(ExtraitParties(Req, 'inventor', 'epodoc'))
#                    print "Inventors : ",  PatentData['inventeur']
                PatentData[u'applicant'] = UniClean(ExtraitParties(Req, 'applicant','epodoc'))
#                    print "Applicants : ", PatentData['applicant']
                PatentData[u'pays'] = ExtraitCountry(Req)
                
                PatentData[u'portee'] = ExtraitKind(Req)
                try:
                    PatentData[u'classification'] = ExtraitIPCR2(Req)
                except:
                    PatentData[u'classification'] = ''
                if isinstance(PatentData[u'classification'], list):
                        for classif in PatentData[u'classification']:
                            PatentData2 = ExtractClassificationSimple2(classif)
                            for cle in PatentData2.keys():
                                if cle in PatentData.keys() and PatentData2[cle] not in PatentData[cle]:
                                    if PatentData[cle] == '':
                                        PatentData[cle] = []
                                    if isinstance(PatentData2[cle], list):
                                        for cont in PatentData2[cle]:
                                            if cont not in PatentData[cle]:
                                                PatentData[cle].append(cont)
                                    else:
                                        if PatentData2[cle] not in PatentData[cle]:
                                            PatentData[cle].append(PatentData2[cle])
                                else:
                                    PatentData[cle] = []
                                    if isinstance(PatentData2[cle], list):
                                        for cont in PatentData2[cle]:
                                            if cont not in PatentData[cle]:
                                                PatentData[cle].append(cont)
                                    else:
                                        PatentData[cle].append(PatentData2[cle])
                elif PatentData[u'classification'] != '':
                        PatentData2 = ExtractClassificationSimple2(PatentData[u'classification'])
                        for cle in PatentData2.keys():
                            if cle in PatentData.keys() and PatentData2[cle] not in PatentData[cle]:
                                if PatentData[cle] == '':
                                    PatentData[cle] = []
                                if isinstance(PatentData2[cle], list):
                                    for cont in PatentData2[cle]:
                                        if cont not in PatentData[cle]:
                                            PatentData[cle].append(cont)
                                else:
                                    PatentData[cle] = []
                                    PatentData[cle].append(PatentData2[cle])
                            elif cle not in PatentData.keys(): 
                                PatentData[cle] = []
                                if isinstance(PatentData2[cle], list):
                                    for cont in PatentData2[cle]:
                                        if cont not in PatentData[cle]:
                                            PatentData[cle].append(cont)
                                else:
                                    if PatentData2[cle] not in PatentData[cle]:
                                        PatentData[cle].append(PatentData2[cle])
                                #                print classif
                del(PatentData[u'classification'])
                #PatentData[u'applicant'] = Formate(PatentData['applicant'], PatentData['pays'])
                
                # remember inventor original writing form to reuse in the url property of the node
                #PatentData[u'inventeur'] = Formate(PatentData['inventeur'], PatentData['pays'])
                PatentData = SeparateCountryField(PatentData)

#            #print "Classification Reduced: ", PatentData['ClassifReduite']
                date = ExtractionDate(Req) #priority claim first date time
                if date is not None and date != "":
                    PatentData[u'date'] = date[0:4] +'-'+ date[4:6] +'-'+ date[6:]
                    PatentData[u'dateDate'] = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:]))
 #                       print "patent date", PatentData['date']
                else:
                    PatentData[u'dateDate'] = datetime.date.today()
                    PatentData[u'date'] = str(datetime.date.today().year) +'-' + str(datetime.date.today().month) + '-' + str(datetime.date.today().day)
                #try: #hum straight forward may be not the good choice
                try:    
                    if u'references-cited' in donnee[u'exchange-document'][u'bibliographic-data'].keys():
                        if "citation"  in donnee[u'exchange-document'][u'bibliographic-data'][u'references-cited'].keys():
                            PatentData[u'citations'] = len(donnee[u'exchange-document'][u'bibliographic-data'][u'references-cited'][u'citation'])
                    else:
                        PatentData[u'citations'] = 0
                except:
                    PatentData[u'citations'] = 0 
                    #it is may be an Application patent. Hence, no CIB, no citation... so I should avoid it
#                        print " *********************************   "
                
                #if cpt == 1:#not the first one !!!!
                try:
                    if donnee[u'priority-claim'][u'priority-active-indicator']['$'] == u'YES':
                        PatentData['priority-active-indicator'] = 1
                except:
                    PatentData['priority-active-indicator'] = 0
                     ## should check what is "active indicator" for patent
                try:
                    if donnee[u'application-reference'][u'@is-representative'] == u'YES':
                        PatentData['representative'] = 1                            
#                            PatentData['representative'] = True
                except:
                    PatentData[u'representative'] = 0
                        # should check what is reprensentativeness for patent
        
                PatentData[u'family lenght'] = len(dico)
                

                for cle in PatentData.keys():
                    if isinstance(PatentData[cle], list):
                        if len(PatentData[cle]) == 1:
                            PatentData[cle] == PatentData[cle][0] #UnNesting
                if None not in PatentData.values():
                    IRAM = '**** *Label_' + PatentData[u'label'] +' *Country_'+PatentData[u'pays']+ ' *CIB3_'+'-'.join(PatentData[u'IPCR3']) + ' *CIB1_'+'-'.join(PatentData[u'IPCR1']) + ' *CIB4_'+'-'.join(PatentData[u'IPCR4']) + ' *Date_' + str(PatentData[u'dateDate'].year) + ' *Applicant_'+'-'.join(coupeEnMots(str(PatentData[u'applicant'])))
                    TXT=dict()
                    if isinstance(donnee[u'exchange-document'], list):
                        for tempo in donnee[u'exchange-document']:
                            if tempo.has_key('abstract'):
                                txtTemp = ExtractAbstract(tempo['abstract'])
                                for cleLang in txtTemp:
                                    if TXT.has_key(cleLang):
                                        TXT[cleLang] += txtTemp[cleLang]
                                    else:
                                        TXT[cleLang] = txtTemp[cleLang]
                    else:
                      if donnee[u'exchange-document'].has_key('abstract'):
                          TXT = ExtractAbstract(donnee[u'exchange-document'][u'abstract'])
                    for lang in TXT.keys():                            
                        EcritContenu(IRAM + ' *Contenu_Abstract \n' + TXT[lang], ResultContents+'//FamiliesAbstracts//'+lang+'-'+PatentData['label']+'.txt')   

                    lstres.append(PatentData)
                    cpt += 1
                else:                        
#                    print "hum... missing values... avoiding this patent"
                    #print "Cleaning data"
                    for key in PatentData.keys():
                        if isinstance(PatentData[key], list):
                            if len(PatentData[key])==1:
                                PatentData[key] = PatentData[key][0]
                        elif isinstance(PatentData[key], unicode):
                            pass
                        elif isinstance(PatentData[key], unicode):
                            PatentData[key] = unicode(PatentData[key])
                        else:
                            PatentData[key] = u''

        datemin = datetime.date(3000, 1, 1)
        
        for brevet in lstres:
            if brevet.has_key('representative'):
                if brevet['dateDate'] < datemin:
                    datemin = brevet['dateDate']
                    prior = brevet['label']
        if 'prior' not in locals():
            prior = brev['label']
        for brevet in lstres:
            brevet['prior'] = prior
#        print "exceptions ", comptExcept
#        print len(lstres), ' patents added'
    return lstres
                            TXT=dict()
                            if isinstance(patentCont[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'], list):
                                for tempo in  patentCont[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document']:
                                    if tempo.has_key('abstract'):
                                        txtTemp = ExtractAbstract(tempo['abstract'])
                                        for cleLang in txtTemp:
                                            if TXT.has_key(cleLang):
                                                TXT[cleLang] += txtTemp[cleLang]
                                            else:
                                                TXT[cleLang] = txtTemp[cleLang]
                            else:
                              if patentCont[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'].has_key('abstract'):
                                  TXT = ExtractAbstract(patentCont[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'][u'abstract'])
                            
                                                
                            for lang in TXT.keys():                            
                                EcritContenu(IRAM + ' *Contenu_Abstract \n' + TXT[lang], RepDir+ '//'+ content+'s//'+lang+'-'+ndb+'.txt')   
                                Langues.add(lang)
                            abstract +=1
    #                        except:
    #                            print "pas glop"
    #                            print patentCont
                        
else:
    print "no gather parameter set. Finishing."

lstfic = os.listdir(ResultPathContent+'//abstracts/')
lang = [fics[0:2] for fics in lstfic]
Langues = set(lang)
print 'Over the ', len(lstBrevet),  ' patents... '
print abstract, " not so empty abstract gathered. See ", ndf.replace('.dump', '')+'/Abstracts/ directory for files'
def ExtractPatent(pat, ResultContents, BiblioPatents):
    DejaLa = [bre['label'] for bre in BiblioPatents]
    for cle in ['inventeur', 'applicant', 'date', 'dateDate', 'titre']:
        if cle != 'date' and cle !='dateDate':
            if pat[cle] == None:
                pat[cle] = 'empty'
        else:
            if cle == 'date' and pat[cle] == None:
                import datetime
                pat[cle] = str(datetime.date.today().year) + '-' + str(datetime.date.today().month) + '-' + str(datetime.date.today().day)
            elif cle == 'dateDate' and pat[cle] == None:
                import datetime
                pat[cle] = datetime.date.today().year

    
    cles = [key for key in pat.keys() if pat[key]==None]
    for cle in cles:
        if cle=='date':
            pat[cle] = unicode(datetime.date.today().year)
        elif cle=="dateDate":
            pat[cle] = datetime.date.today()
        else:
            bre[cle] = u'empty'

    if None not in pat.values():        
#if Brev['label'] == Brev["prior"]: # just using primary patents not all the family
        if isinstance(pat['classification'], list):
            for classif in pat['classification']:
                tempo2 = ExtractClassificationSimple2(classif)
                for cle in tempo2.keys():
                    if cle in pat.keys() and tempo2[cle] not in pat[cle]:
                        if pat[cle] == '':
                            pat[cle] = []
                        if isinstance(tempo2[cle], list):
                            pat[cle].extend(tempo2[cle])
                        else:
                            pat[cle].append(tempo2[cle])
                    else:
                        pat[cle] = []
                        if isinstance(tempo2[cle], list):
                            pat[cle].extend(tempo2[cle])
                        else:
                            pat[cle].append(tempo2[cle])
                    if pat[cle].count(',')>0:
                        print pat[cle] #hum, strage state
        else:
            tempo2 = ExtractClassificationSimple2(pat['classification'])
            for cle in tempo2.keys():
                if cle in pat.keys() and tempo2[cle] not in pat[cle]:
                    if pat[cle] == '':
                        pat[cle] = []
                    if isinstance(tempo2[cle], list):
                        pat[cle].extend(tempo2[cle])
                    else:
                        pat[cle].append(tempo2[cle])
                else:
                    pat[cle] = []
                    if isinstance(tempo2[cle], list):
                        pat[cle].extend(tempo2[cle])
                    else:
                        pat[cle].append(tempo2[cle])
                if pat[cle].count(',')>0:
                    print pat[cle] #hum, strage state

            
                    #                print classif
        pat = SeparateCountryField(pat)
        for clekey in pat.keys():
            if isinstance(pat[clekey], list):
                pat[clekey] = UnNest(pat[clekey])
        if isinstance(pat['IPCR1'], list):
            CIB1 = '-'.join(dat for dat in pat['IPCR1'])
        else:
            CIB1 =  pat['IPCR1']
            
        if isinstance(pat['IPCR3'], list):
            CIB3 = '-'.join(dat for dat in pat['IPCR3'])
        else:
            CIB3 =  pat['IPCR3']
        if isinstance(pat['IPCR4'], list):
            CIB4 = '-'.join(dat for dat in pat['IPCR4'])
        else:
            CIB4 =  pat['IPCR4']
        IRAM = '**** *Label_' + ndb +' *Country_'+pat['pays']+ ' *CIB3_'+CIB3 + ' *CIB1_'+CIB1 + ' *CIB4_'+CIB4 + ' *Date_' + str(pat['dateDate'].year) + ' *Applicant_'+UniClean('-'.join(coupeEnMots(pat['applicant'])))[0:12]
        IRAM = IRAM.replace('_ ', '_empty', IRAM.count('_ ')) +'\n'
        TXT=dict()
        if isinstance(patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'], list):
            for tempo in patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document']:
                if tempo.has_key('abstract'):
                    txtTemp = ExtractAbstract(tempo['abstract'])
                    for cleLang in txtTemp:
                        if TXT.has_key(cleLang):
                            TXT[cleLang] += txtTemp[cleLang]
                        else:
                            TXT[cleLang] = txtTemp[cleLang]
            
        else:
            if patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'].has_key('abstract'):
                TXT = ExtractAbstract(patentBib[u'ops:world-patent-data'][u'exchange-documents'][u'exchange-document'][u'abstract'])
                for lang in TXT.keys():                            
                    EcritContenu(IRAM + TXT[lang], ResultAbstractPath+'//'+lang+'-'+ndb+'.txt')   
        if pat['label'] in DejaLa: #checking multiples status
                tempor = [patent for patent in BiblioPatents if patent['label'] == pat["label"]][0] #should be unique
                BiblioPatents.remove(tempor)
                tempor = Update(tempor, pat)
                for key in tempor.keys():
                    if isinstance(tempor[key], list):
                        tempor[key] = UnNest(tempor[key])
                tempor = CleanPatent(tempor)
                BiblioPatents.append(CleanPatent(tempor))
                
        else:
            for key in pat.keys():
                if isinstance(pat[key], list):
                    pat[key] =  UnNest(pat[key])
            pat = CleanPatent(pat)
            BiblioPatents.append(CleanPatent(pat))
            DejaLa.append(pat['label'])
        return pat, YetGathered, BiblioPatents
    else:#None values avoiding this patent
        if pat.has_key('label'):
            DejaLa.append(pat['label'])
        return None, DejaLa, BiblioPatents
Exemple #4
0
def GetFamilly(client, brev, rep):
    from OPS2NetUtils2 import ExtractClassificationSimple2, SeparateCountryField, ExtractAbstract, UniClean
    import epo_ops
    import datetime
    ResultContents = rep
    lstres = []
    comptExcept = 0

    #    try:
    #        url ='http://ops.epo.org/3.1/rest-services/family/publication/docdb/' +brev['label'] +'/biblio'
    #
    #        data = requests.get(url, headers = headers)
    dico = None
    try:
        data = client.family('publication',
                             epo_ops.models.Epodoc(brev['label']), 'biblio')
        data = data.json()
        dico = data[u'ops:world-patent-data'][u'ops:patent-family'][
            u'ops:family-member']
        #PatentDataFam[brev['label']] = dict()
        if type(dico) == type(dict()):
            dico = [dico]
        cpt = 1
    except:
        try:
            data = client.family(
                'publication',
                epo_ops.models.Docdb(brev['label'][2:], brev['label'][0:2],
                                     brev['portee']))
            data = data.json()
            dico = data[u'ops:world-patent-data'][u'ops:patent-family'][
                u'ops:family-member']
            #PatentDataFam[brev['label']] = dict()
            if type(dico) == type(dict()):
                dico = [dico]
            cpt = 1
        except:

            print "nothing found for ", brev
            print "ignoring"
            return None

    if dico is not None:

        for donnee in dico:
            Go = True
            Brevet = dict(dict(dict(dict())))
            Brevet[u'ops:world-patent-data'] = dict()
            Brevet[u'ops:world-patent-data']['ops:biblio-search'] = dict()
            Brevet[u'ops:world-patent-data']['ops:biblio-search'][
                'ops:search-result'] = dict()
            Brevet[u'ops:world-patent-data']['ops:biblio-search'][
                'ops:search-result'][
                    u'exchange-documents'] = donnee  #hum no sure that it is a good way
            PatentData = dict()
            Req = Brevet

            try:
                PatentData[u'label'] = donnee[u'exchange-document'][
                    u'bibliographic-data'][u'publication-reference'][
                        u'document-id'][1][u'doc-number'][u'$']
            except:
                try:
                    PatentData[u'label'] = donnee[u'publication-reference'][
                        u'document-id'][1][u'doc-number']['$']
                except:
                    print "no label ?"
                    Go = False
                # print pprint.pprint(donnee)

            if Go:
                #PatentDataFam[PatentData['label']] = dict()
                PatentData[u'titre'] = UniClean(ExtraitTitleEn(Req))
                #                    print "Patent title(s)", PatentData['titre']

                PatentData[u'inventeur'] = UniClean(
                    ExtraitParties(Req, 'inventor', 'epodoc'))
                #                    print "Inventors : ",  PatentData['inventeur']
                PatentData[u'applicant'] = UniClean(
                    ExtraitParties(Req, 'applicant', 'epodoc'))
                #                    print "Applicants : ", PatentData['applicant']
                PatentData[u'pays'] = ExtraitCountry(Req)

                PatentData[u'portee'] = ExtraitKind(Req)
                try:
                    PatentData[u'classification'] = ExtraitIPCR2(Req)
                except:
                    PatentData[u'classification'] = ''
                if isinstance(PatentData[u'classification'], list):
                    for classif in PatentData[u'classification']:
                        PatentData2 = ExtractClassificationSimple2(classif)
                        for cle in PatentData2.keys():
                            if cle in PatentData.keys(
                            ) and PatentData2[cle] not in PatentData[cle]:
                                if PatentData[cle] == '':
                                    PatentData[cle] = []
                                if isinstance(PatentData2[cle], list):
                                    for cont in PatentData2[cle]:
                                        if cont not in PatentData[cle]:
                                            PatentData[cle].append(cont)
                                else:
                                    if PatentData2[cle] not in PatentData[cle]:
                                        PatentData[cle].append(
                                            PatentData2[cle])
                            else:
                                PatentData[cle] = []
                                if isinstance(PatentData2[cle], list):
                                    for cont in PatentData2[cle]:
                                        if cont not in PatentData[cle]:
                                            PatentData[cle].append(cont)
                                else:
                                    PatentData[cle].append(PatentData2[cle])
                elif PatentData[u'classification'] != '':
                    PatentData2 = ExtractClassificationSimple2(
                        PatentData[u'classification'])
                    for cle in PatentData2.keys():
                        if cle in PatentData.keys(
                        ) and PatentData2[cle] not in PatentData[cle]:
                            if PatentData[cle] == '':
                                PatentData[cle] = []
                            if isinstance(PatentData2[cle], list):
                                for cont in PatentData2[cle]:
                                    if cont not in PatentData[cle]:
                                        PatentData[cle].append(cont)
                            else:
                                PatentData[cle] = []
                                PatentData[cle].append(PatentData2[cle])
                        elif cle not in PatentData.keys():
                            PatentData[cle] = []
                            if isinstance(PatentData2[cle], list):
                                for cont in PatentData2[cle]:
                                    if cont not in PatentData[cle]:
                                        PatentData[cle].append(cont)
                            else:
                                if PatentData2[cle] not in PatentData[cle]:
                                    PatentData[cle].append(PatentData2[cle])
                            #                print classif
                del (PatentData[u'classification'])
                #PatentData[u'applicant'] = Formate(PatentData['applicant'], PatentData['pays'])

                # remember inventor original writing form to reuse in the url property of the node
                #PatentData[u'inventeur'] = Formate(PatentData['inventeur'], PatentData['pays'])
                PatentData = SeparateCountryField(PatentData)

                #            #print "Classification Reduced: ", PatentData['ClassifReduite']
                date = ExtractionDate(Req)  #priority claim first date time
                if date is not None and date != "":
                    PatentData[
                        u'date'] = date[0:4] + '-' + date[4:6] + '-' + date[6:]
                    PatentData[u'dateDate'] = datetime.date(
                        int(date[0:4]), int(date[4:6]), int(date[6:]))
#                       print "patent date", PatentData['date']
                else:
                    PatentData[u'dateDate'] = datetime.date.today()
                    PatentData[u'date'] = str(
                        datetime.date.today().year) + '-' + str(
                            datetime.date.today().month) + '-' + str(
                                datetime.date.today().day)
                #try: #hum straight forward may be not the good choice
                try:
                    if u'references-cited' in donnee[u'exchange-document'][
                            u'bibliographic-data'].keys():
                        if "citation" in donnee[u'exchange-document'][
                                u'bibliographic-data'][
                                    u'references-cited'].keys():
                            PatentData[u'citations'] = len(
                                donnee[u'exchange-document']
                                [u'bibliographic-data'][u'references-cited']
                                [u'citation'])
                    else:
                        PatentData[u'citations'] = 0
                except:
                    PatentData[u'citations'] = 0
                    #it is may be an Application patent. Hence, no CIB, no citation... so I should avoid it
#                        print " *********************************   "

#if cpt == 1:#not the first one !!!!
                try:
                    if donnee[u'priority-claim'][u'priority-active-indicator'][
                            '$'] == u'YES':
                        PatentData['priority-active-indicator'] = 1
                except:
                    PatentData['priority-active-indicator'] = 0
                    ## should check what is "active indicator" for patent
                try:
                    if donnee[u'application-reference'][
                            u'@is-representative'] == u'YES':
                        PatentData['representative'] = 1
#                            PatentData['representative'] = True
                except:
                    PatentData[u'representative'] = 0
                    # should check what is reprensentativeness for patent

                PatentData[u'family lenght'] = len(dico)

                for cle in PatentData.keys():
                    if isinstance(PatentData[cle], list):
                        if len(PatentData[cle]) == 1:
                            PatentData[cle] == PatentData[cle][0]  #UnNesting
                if None not in PatentData.values():
                    IRAM = '**** *Label_' + PatentData[
                        u'label'] + ' *Country_' + PatentData[
                            u'pays'] + ' *CIB3_' + '-'.join(
                                PatentData[u'IPCR3']) + ' *CIB1_' + '-'.join(
                                    PatentData[u'IPCR1']
                                ) + ' *CIB4_' + '-'.join(
                                    PatentData[u'IPCR4']) + ' *Date_' + str(
                                        PatentData[u'dateDate'].year
                                    ) + ' *Applicant_' + '-'.join(
                                        coupeEnMots(
                                            str(PatentData[u'applicant'])))
                    TXT = dict()
                    if isinstance(donnee[u'exchange-document'], list):
                        for tempo in donnee[u'exchange-document']:
                            if tempo.has_key('abstract'):
                                txtTemp = ExtractAbstract(tempo['abstract'])
                                for cleLang in txtTemp:
                                    if TXT.has_key(cleLang):
                                        TXT[cleLang] += txtTemp[cleLang]
                                    else:
                                        TXT[cleLang] = txtTemp[cleLang]
                    else:
                        if donnee[u'exchange-document'].has_key('abstract'):
                            TXT = ExtractAbstract(
                                donnee[u'exchange-document'][u'abstract'])
                    for lang in TXT.keys():
                        EcritContenu(
                            IRAM + ' *Contenu_Abstract \n' + TXT[lang],
                            ResultContents + '//FamiliesAbstracts//' + lang +
                            '-' + PatentData['label'] + '.txt')

                    lstres.append(PatentData)
                    cpt += 1
                else:
                    #                    print "hum... missing values... avoiding this patent"
                    #print "Cleaning data"
                    for key in PatentData.keys():
                        if isinstance(PatentData[key], list):
                            if len(PatentData[key]) == 1:
                                PatentData[key] = PatentData[key][0]
                        elif isinstance(PatentData[key], unicode):
                            pass
                        elif isinstance(PatentData[key], unicode):
                            PatentData[key] = unicode(PatentData[key])
                        else:
                            PatentData[key] = u''

        datemin = datetime.date(3000, 1, 1)

        for brevet in lstres:
            if brevet.has_key('representative'):
                if brevet['dateDate'] < datemin:
                    datemin = brevet['dateDate']
                    prior = brevet['label']
        if 'prior' not in locals():
            prior = brev['label']
        for brevet in lstres:
            brevet['prior'] = prior


#        print "exceptions ", comptExcept
#        print len(lstres), ' patents added'
    return lstres