def run(): # Bootstrap logging boot_logging() # Load configuration configFile = LoadConfig() # Run this step only if enabled if configFile.FormateExportCountryCartography: # Get some paths from configuration storage_path = configFile.ResultBiblioPath output_path = configFile.ResultPath # Compute prefixes prefixes = [""] if configFile.GatherFamilly: prefixes.append("Families") # Build maps for all prefixes for prefix in prefixes: # Status message label = label_from_prefix(prefix) logger.info( "Generating maps about applicants' and inventors' origin countries for {}. " .format(label)) # Compute storage slot storage_name = prefix + configFile.ndf # Read data from storage, generate map and save JSON and HTML to filesystem generate_map(storage_path, storage_name, output_path) # Due to limit of D3, countries resources are necessary placed # in same working directory... other solution is to start an http server # http://stackoverflow.com/questions/17077931/d3-samples-in-a-microsoft-stack # Clone required resources into result directory shutil.copy('countries.json', os.path.join(output_path, "countries.json"))
def main(): config = LoadConfig() target_path = get_target_path() ProcessList(config.ndf).reset() FusionList(config.ndf).reset() run_spliter = Popen( ['python', 'Patent2Net/scripts/run_spliter.py', target_path]) run_spliter.wait() process_list = Popen( ['python', 'Patent2Net/scripts/process_list.py', target_path]) process_list.wait() fusion_patents = Popen( ['python', 'Patent2Net/scripts/fusion_patents.py', target_path]) fusion_patents.wait() destination = os.path.join("..", target_path.replace(".cql", "") + "_fusion.cql") config = "--config=" + destination Popen(['p2n', 'run', config])
def get_one_request(p2n_dir): dex = get_current_dex() configFile = LoadConfig(p2n_dir + ".cql") return get_success_response( "", { "done": p2n_dir in dex["done"], "state": get_state(p2n_dir), "data": get_directory_request_data_all(p2n_dir), "progress": get_data_progress(p2n_dir), "directory": p2n_dir, "cql": { "requete": configFile.requete, "ndf": configFile.ndf, "options": { "GatherContent": configFile.GatherContent, "GatherBiblio": configFile.GatherBiblio, "GatherPatent": configFile.GatherPatent, "GatherFamilly": configFile.GatherFamilly } } })
def run(): aujourd = datetime.date.today() configFile = LoadConfig() requete = configFile.requete ndf = configFile.ndf Gather = configFile.GatherContent GatherBiblio = configFile.GatherBiblio GatherPatent = configFile.GatherPatent GatherFamilly = configFile.GatherFamilly IsEnableScript = configFile.FormateExportDataTable #should set a working dir one upon a time... done it is temporPath ListBiblioPath = configFile.ResultBiblioPath temporPath = configFile.temporPath ResultPathContent = configFile.ResultPath ResultListPath = configFile.ResultListPath ResultBiblioPath = configFile.ResultBiblioPath # Lecture du fichier de référence lstApplic = [] Inventeurs = [] Applicants = [] nbAppliAvant = dict() nbInvAvant = dict() # traitement des fichiers + familles if GatherFamilly: PU = [ndf, 'Families' + ndf] else: PU = [ndf] for fic in PU: if 'Old' + fic not in os.listdir(ResultBiblioPath): cptInv, cptAppl = 0, 0 print( "\n> Hi! This is Pre Process for filtering equivalents patents from dataset gathered by P2N-OPSGather: used on:", fic) if 'Description' + fic in os.listdir(ListBiblioPath): with open(ListBiblioPath + '//' + fic, 'r', encoding="utf8") as data: dico = LoadBiblioFile(ListBiblioPath, fic) else: # Retrocompatibility print("please use Comptatibilizer") sys.exit() LstBrevet = dico['brevets'] # patent filtering process Filtres = [] dejaVus = [] LabBrevets = [brev['label'] for brev in LstBrevet] for bre in LstBrevet: # parcours de la listee des brevets if bre['label'] not in dejaVus: # si pas vu dejaVus.append(bre['label']) #rajout aux vus # parcours des equivalents. Deux cas : une liste ou un chaine non vide if isinstance(bre['equivalents'], list) and len(bre['equivalents']) > 0: # récupération de la liste des dates de chaque équvalents dates = [] for brev in bre['equivalents']: if brev in LabBrevets: # si celui-ci fait partie des brevets de départ (sinon ignoré) for brevet in LstBrevet: # on va le chercher if brevet[ 'label'] == brev: # yes c'est lui !!! if isinstance( brevet["date"], list ): # les dates sont quelquefois en liste OU en chaine :-() date = min(brevet["date"]) else: date = brevet["date"] # on rajoute à la structure adhoc : date, brevet, taille (nb de caractères) if len(date) < 4: print("Aille") dates.append((date, bre, len(str(bre.values())))) #dates.extend((brevet["date"][0], brevet, len(str(brevet.values()))) for brevet in LstBrevet if brevet['label'] == brev ) dejaVus.append(brev) if len(dates) == 1: # pas d'ambiguité Filtres.append(dates[0][1]) elif len(dates) > 1: #récupération du plus vieux MiniDate = min([dat for dat, brev, val in dates]) MaxVal = max(val for dat, brev, val in dates) if len(MiniDate) < 5: print(bre['prior-Date'], ' -- > ', MiniDate) # giving priority to the first in date apparition maximizing lenght (su^^posed to be fields with information) candidat = [ bre for dat, bre, val in dates if dat == MiniDate and val == MaxVal ] if len(candidat ) == 0: # ou au max d'apport informationnel # if it doens't work giving priority to max information content candidat = [ brev for dat, brev, val in dates if val == MaxVal ] if len(candidat) > 1: priorDateMin = min([ min(brevet["prior-Date"]) for brevet in candidat ]) NewCandidat = [ brev for brev in candidat if priorDateMin in brev["prior-Date"] ] if len(NewCandidat) > 1: NewCandidat = NewCandidat[0] Filtres.append(NewCandidat) else: print("pffff") else: #aucun des équivalents dans la liste Filtres.append(candidat[0]) else: Filtres.append(bre) elif isinstance(bre['equivalents'], str) and len(bre['equivalents']) > 0: #len(bre ['equivalents'])>0 and bre ['equivalents'] in LabBrevets: if bre['equivalents'] in LabBrevets: brevet = [ brev for brev in LstBrevet if brev['label'] == bre['equivalents'] ][0] if isinstance( brevet["date"], list ): # les dates sont quelquefois en liste OU en chaine :-() date = min(brevet["date"]) else: date = brevet["date"] if len(date) < 4: print("Aille") dates = [(date, brevet, len(brevet.values()))] if isinstance( bre["date"], list ): # les dates sont quelquefois en liste OU en chaine :-() date = min(bre["date"]) else: date = bre["date"] # joining currend patent dates.append((date, bre, len(bre.values()))) MiniDate = min([dat for dat, bre, val in dates]) MaxVal = max([val for dat, bre, val in dates]) candidat = [ bre for dat, bre, val in dates if dat == MiniDate and val == MaxVal ] if len(candidat) > 1: pass elif len(candidat) == 0: # if it doens't work giving priority to max information content candidat = [ brev for dat, brev, val in dates if val == MaxVal ] if len(candidat) > 1: priorDateMin = min([ min(brevet["prior-Date"]) for brevet in candidat ]) NewCandidat = [ brev for brev in candidat if priorDateMin in brev["prior-Date"] ] if len(NewCandidat) > 1: NewCandidat = NewCandidat[0] Filtres.append(NewCandidat) else: print("pffff") else: Filtres.append(candidat[0]) else: #equivalent pas dans le corpus Filtres.append(bre) else: Filtres.append(bre) for dat, brevet, val in dates: dejaVus.append(brevet['label']) # joining lost patents LabFiltered = [] for bre in Filtres: if isinstance(bre["label"], str): LabFiltered.append(bre['label']) else: LabFiltered.append(bre['label'][0]) EquivFiltered = [] for bre in Filtres: for pat in bre['equivalents']: EquivFiltered.append(pat) complement = [bre for bre in LstBrevet \ if bre ['label'] not in LabFiltered \ and sum([eq in EquivFiltered for eq in bre["equivalents"]]) ==0] NewFilt = [] DejaVus = [] for bre in Filtres: if isinstance(bre['label'], list): bre['label'] = bre['label'][0] if bre['label'] not in DejaVus: equi = [] cpFilt = copy.copy(Filtres) cpFilt.remove(bre) for bre1 in cpFilt: if isinstance(bre1['equivalents'], list): for eq in bre1['equivalents']: if len(eq) > 0 and eq != 'empty': equi.append(eq) elif len(bre1['equivalents'] ) > 1 and bre1['equivalents'] != 'empty': equi.append(bre1['equivalents']) else: pass if len(bre['equivalents'] ) > 0 and bre['equivalents'] != 'empty': res = sum([pat in equi for pat in bre['equivalents']] + [bre['label'] in equi]) if res > 0: tempo = [ (bre2['date'], bre2, len(bre2.values())) for bre2 in cpFilt if bre['equivalents'] ] # on pourrait direct aller là et tester sur la taille de tempo :-/ tempo2 = [] for dat, brevet, val in tempo: if isinstance(dat, str): tempo2.append((dat, brevet, val)) elif isinstance(dat, list): for truc in dat: if len(truc) > 0: tempo2.append((truc, brevet, val)) else: pass tempo = tempo2 dates = [] valeurs = [] for dat, brevet, val in tempo: dates.append(dat) valeurs.append(val) miniDate = min(dates) maxVal = max(valeurs) tempo2 = [ bre for dat, brevet, val in tempo if dat == miniDate and val == maxVal ] if len(tempo2) > 0: NewFilt.append(tempo2[0]) if isinstance(tempo2[0]['equivalents'], list): for eq in tempo2[0]['equivalents']: DejaVus.append(eq) elif tempo2[0]['equivalents'] != 'empty': DejaVus.append(tempo2[0]['equivalents']) else: tempo2 = [ bre for dat, brevet, val in tempo if val == maxVal ] if len(tempo2) > 0: NewFilt.append(tempo2[0]) if isinstance(tempo2[0]['equivalents'], list): for eq in tempo2[0]['equivalents']: DejaVus.append(eq) elif tempo2[0]['equivalents'] != 'empty': DejaVus.append(tempo2[0]['equivalents']) else: pass else: NewFilt.append(bre) else: NewFilt.append(bre) if isinstance(bre['label'], str): DejaVus.append(bre['label']) else: for lab in bre['label']: DejaVus.append(lab) if isinstance(bre['equivalents'], list): for eq in bre['equivalents']: DejaVus.append(eq) elif bre['equivalents'] != 'empty': DejaVus.append(bre['equivalents']) else: pass EquivFiltered = [] cpFilt = copy.copy(Filtres) for bre in Filtres: if not isinstance(bre['label'], str): if len(bre['label']) > 0: bre['label'] = bre['label'][0] else: print("no label !!!!") else: pass toRemove = [] for bre in Filtres: if not isinstance(bre['equivalents'], list): if len(bre['equivalents'] ) and bre['equivalents'] != 'empty': bre['equivalents'] = [bre['equivalents']] else: bre['equivalents'] = [] for pat in bre['equivalents']: if pat != bre['label']: if pat not in EquivFiltered: EquivFiltered.append(pat) else: cpFilt = [ brev for brev in cpFilt if brev['label'] != bre['label'] ] toRemove.append((bre['label'], bre)) exclude = [truc for truc, muche in toRemove] Resultat = [] for bre in Filtres: if bre['label'] not in exclude: Resultat.append(bre) EquivFiltered2 = [] for bre in Resultat: for pat in bre['equivalents']: EquivFiltered2.append(pat) print("net set of equivalent covered: ", len(EquivFiltered2)) print(len(LstBrevet), ' --> ', len(Filtres), ' --> ', len(Resultat)) print("Good, ", len(Resultat + complement), " patents filterd from equivalent unicity exrtracted from ", fic) #Saving file for brev in Resultat: with open(ResultBiblioPath + '//tempo' + fic, 'ab') as ficRes: pickle.dump(brev, ficRes) os.rename(ResultBiblioPath + '//' + fic, ResultBiblioPath + '//Old' + fic) os.rename(ResultBiblioPath + '//tempo' + fic, ResultBiblioPath + '//' + fic)
# -*- coding: utf-8 -*- """ Created on Wed May 24 08:00:33 2017 This script load the xml IPCR descriptions text from Wipo (ipcr-2015.xml) and a patent universe from P2N (a list of patent according to a request). It develops "Augmented Abstracts" consisting of each abstracts completed with the sum of the first deepers classifications descriptions text (up to the section level) found in the patent metadata @author: dreymond """ from lxml import etree from Patent2Net.P2N_Lib import LoadBiblioFile, symbole from Patent2Net.P2N_Config import LoadConfig import sys, os, codecs configFile = LoadConfig() requete = configFile.requete ndf = configFile.ndf #should set a working dir one upon a time... done it is temporPath ResultPath = configFile.ResultBiblioPath temporPath = configFile.temporPath ResultContentsPath = configFile.ResultContentsPath ResultBiblioPath = configFile.ResultBiblioPath ResultPathContent = '..//DATA//'+ndf+'//PatentContents' #Setting wether or not we use only primary classification Primar = True #Setting cache for performance purposes CIB = dict() if 'Description'+ndf or 'Description'+ndf.lower() in os.listdir(ResultBiblioPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
def run(): # Bootstrap logging boot_logging() # Load configuration config = LoadConfig() # Run this only if enabled if not config.GatherImages: return # Get some information from configuration expression = config.requete storage_basedir = config.ResultBiblioPath storage_dirname = config.ndf output_path = config.ResultPathImages # Compute prefixes prefixes = [""] if config.GatherFamilly: prefixes.append("Families") # Build maps for all prefixes for prefix in prefixes: # Status message label = label_from_prefix(prefix) logger.info("Generating gallery of drawings for {}. ".format(label)) # Compute storage slot using prefix and DataDirectory # e.g. "Lentille" vs. "FamiliesLentille" storage_name = prefix + storage_dirname # Load bibliographic data biblio_file = LoadBiblioFile(storage_basedir, storage_name) # Generate thumbnails gallery = [] patents = biblio_file['brevets'] cpt = 0 for patent in patents: cpt + 1 AnnonceProgres(Appli='p2n_image', valMax=100, valActu=90 + cpt * 10 / len(patents)) patent_label = get_patent_label(patent) i = 1 logger.info('Processing patent {}'.format(patent_label)) path_img_base = '{}//{}-{}.tiff'.format(output_path, patent_label, '{}') path = path_img_base.format(i) while os.path.exists(path): thumb, orig, tiff = generate_thumbnails(path) gallery.append({ "_id": '{}-{}'.format(patent_label, i), 'thumb': thumb, 'orig': orig, 'label': patent['title'], 'ipcr7': patent['IPCR7'], 'code': patent_label, 'tiff': tiff, }) i += 1 path = path_img_base.format(i) # Render gallery AnnonceProgres(Appli='p2n_image', valMax=100, valActu=100) RenderTemplate( 'ModeleImages.html', output_path + '/index' + prefix + '.html', request=expression.replace('"', ''), gallery=gallery, json=json.dumps(gallery), )
def main(): configFile = LoadConfig() RequestOrig = configFile.requete directory = configFile.ndf today = datetime.datetime.today() read_dex() to_be_found = get_data_to_be_found(directory) if to_be_found == None: print("Vous devez d'abord verifier si la requete doit être découpée") return None need_spliter = to_be_found["need_spliter"] lstFicOk = to_be_found["lstFicOk"] if need_spliter != True: print("Cette requete n'a pas besoin d'être découpée") return None dateDeb = get_data_spliter_start_date(directory) if dateDeb == None: dateDeb=1900 #print("Vous devez préciser la date de début pour découper la requete") # return None targetDirectory = REQUEST_AUTO_FOLDER + directory if not os.path.exists(targetDirectory): os.makedirs(targetDirectory) Request = RequestOrig + ' AND PD=date' DataDir = directory + '_segments_' delete_data_spliter(directory) set_spliter_result_start(directory) jourOk, moisOk, ipcOk = False, False, False Total =0 nbFiles = 0 fic =open("Patent2Net/REQUESTS/requestModel.cql", 'r')#requestModel.cql DataReq = targetDirectory data = fic.read() fic.close() print("Start for") for AN in range(dateDeb, today.year+1,1): print(AN) Trouves = checkRequest(Request.replace('=date', '='+str(AN))) if 2000>Trouves>0: Total += Trouves # a request for that year is ok monthOk = False ipcOk = False Request2 = Request.replace('=date', '='+str(AN)) data2 = data.replace("***requete***", Request2) data2 = data2.replace("***dataDir***", DataDir+str(AN)) NameFic = str(AN)+'Request.cql' with open(DataReq+"/"+NameFic, "w") as ficRes: #+"-"+ipc print(ficRes.name.split('/')[1]) if ficRes.name.split('/')[1] not in lstFicOk: ficRes.write(data2) nbFiles +=1 print (ficRes.name, 'file written, ', Trouves,' patents expected and ', Total, ' cumulative.' ) add_spliter_result(directory, ficRes.name, str(AN), Trouves) set_spliter_cumulative(directory, Total) if Trouves == 0: monthOk = False ipcOk = False jourOk = False #nothing to do if Trouves >= 2000: # we have to split by monthes monthOk = True jourOk = False cpt= 0 #used as monthes for month in Months.keys(): cpt +=1 if len(str(cpt))<2: # monthes are numbered thanks to cpt (ugly isn't it ?) mois = '0'+str(cpt) else: mois = str(cpt) Request2 = Request.replace('=date', '='+str(AN)+mois) Trouves = checkRequest(Request2) if 2000>Trouves>0: Total += Trouves # OK less than 2000 and more than 0 go ahead for that request ipcOk = False jourOk = False data2 = data.replace("***requete***", Request2) data2 = data2.replace("***dataDir***", DataDir+str(AN)+mois) NameFic = str(AN)+mois+'Request.cql' if NameFic not in lstFicOk: with open(DataReq+"/"+NameFic, "w") as ficRes: #+"-"+ipc ficRes.write(data2) nbFiles +=1 print (ficRes.name, 'file written, ', Trouves,' patents expected and ', Total, ' cumulative.' ) add_spliter_result(directory, ficRes.name, str(AN), Trouves) set_spliter_cumulative(directory, Total) if Trouves == 0: ipcOk = False jourOk = False #nothing to do if Trouves >= 2000: monthOk = True jourOk = True ipcOk = False # spliting days for that month for day in range(1, Months[month]+1): if len(str(day))<2: jour = '0'+str(day) else: jour = str(day) Request2 = Request.replace('=date', '='+str(AN)+mois+jour) Trouves = checkRequest(Request2) if 2000>Trouves>0: Total += Trouves # go ahead for that day ipcOk = False data2 = data.replace("***requete***", Request2) data2 = data2.replace("***dataDir***", DataDir+str(AN)+mois+jour) NameFic = str(AN)+mois+jour+'Request.cql' if NameFic not in lstFicOk: with open(DataReq+"/"+NameFic, "w") as ficRes: #+"-"+ipc ficRes.write(data2) nbFiles +=1 print (ficRes.name, 'file written, ', Trouves,' patents expected and ', Total, ' cumulative.' ) add_spliter_result(directory, ficRes.name, str(AN), Trouves) set_spliter_cumulative(directory, Total) if Trouves == 0: ipcOk = False jourOk = False #nothing to do if Trouves >= 2000: monthOk = True jourOk = True # bad days for EPO... we need to split again # last solution IPC splitting # for that day only for ipc in IPC: Request3 = Request2 + " AND IC=" + ipc Trouves = checkRequest(Request3) if Trouves>2000: print ("thats bad... the request : " + Request3 + " should be splitted and the limits of this script are reached") break Total += Trouves data2 = data.replace("***requete***", Request3) data2 = data2.replace("***dataDir***", DataDir+str(AN)+mois+jour+ipc) if NameFic not in lstFicOk: with open(DataReq+"/"+str(AN)+mois+'-'+jour+'-'+ipc+'Request.cql', "w") as ficRes: #+"-"+ipc ficRes.write(data2) nbFiles +=1 print (ficRes.name, 'file written, ', Trouves,' patents expected and ', Total, ' cumulative.' ) add_spliter_result(directory, ficRes.name, str(AN), Trouves) set_spliter_cumulative(directory, Total) set_spliter_result_end(directory) print ("[request_spliter] request splitted in ", nbFiles, " files") print ("[request_spliter] Gathering with P2N all this request should lead to ", Total, " patents")