"Inventor-Country", "Applicant-Country", "equivalents", "CPC", u'references', u'CitedBy', 'prior', 'family lenght', 'CitO', 'CitP' ] print "\n> Hi! This is DataTable Families formater", ndf if 'Description' + ndf in os.listdir(ListBiblioPath): with open(ListBiblioPath + '//' + ndf, 'r') as data: dico = LoadBiblioFile(ListBiblioPath, ndf) else: #Retrocompatibility print "please use Comptatibilizer" sys.exit() LstBrevet = dico['brevets'] if dico.has_key('requete'): requete = dico["requete"] print "Using ", ndf, " file. Found ", len( dico["brevets"]), " patents! Formating to HMTL tables" LstExp = [] LstExp2 = [] #just for testing last fnction in gathered should deseapear soon for brev in LstBrevet: #brev = CleanPatent(brev)
if IsEnableScript: # the list of keys for filtering for datatable clesRef = ['label', 'title', 'year','priority-active-indicator', 'IPCR11', 'kind', 'applicant', 'country', 'inventor', 'representative', 'IPCR4', 'IPCR7', "Inventor-Country", "Applicant-Country", "equivalents", "CPC", u'references', u'Citations', u'CitedBy'] prefixes = [""] if GatherFamilly: prefixes.append("Families") for prefix in prefixes: ndf = prefix + configFile.ndf if 'Description'+ndf in os.listdir(ListBiblioPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory LstBrevet = LoadBiblioFile(ListBiblioPath, ndf) with open(ListBiblioPath +'//Description'+ndf, 'r') as ficRes: DataBrevet = cPickle.load(ficRes) else: #Retrocompatibility with open(ListBiblioPath+'//'+ndf, 'r') as data: LstBrevet = cPickle.load(data) ##next may need clarifying update data = LstBrevet LstBrevet = data['brevets'] if data.has_key('requete'): requete = data["requete"] if data.has_key('number'): print "Found ", data["number"], " patents! Formating to HMTL tables"
return dico if IsEnableScript: GatherContent = True #not fun registered_client = epo_ops.Client(key, secret) # data = registered_client.family('publication', , 'biblio') registered_client.accept_type = 'application/json' for ndf in [fic2 for fic2 in os.listdir(ResultBiblioPath) if fic2.count('Description')==0]: if ndf.startswith('Families'): typeSrc = 'Families' else: typeSrc = '' if 'Description'+ndf or 'Description'+ndf.lower() in os.listdir(ResultListPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory ficBrevet = LoadBiblioFile(ResultListPath, ndf) else: #Retrocompatibility print 'gather your data again. sorry' sys.exit() if ficBrevet.has_key('brevets'): lstBrevet = ficBrevet['brevets'] # if data.has_key('requete'): # DataBrevet['requete'] = data["requete"] print "Found ",typeSrc, ' file and', len(lstBrevet), " patents! Gathering contents" else: print 'gather your data again' sys.exit() registered_client = epo_ops.Client(key, secret)
return dico else: return dico if GatherFamilly: print "\n> Hi! This is the family gatherer. Processing ", ndf try: fic = open(ResultPath + '//' + ndf, 'r') print "loading data file ", ndf + ' from ', ResultPath, " directory." if 'Description' + ndf or "Description" + ndf.title() in os.listdir( ResultPath ): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory data = LoadBiblioFile(ResultPath, ndf) else: #Retrocompatibility :-) print "gather your data again" sys.exit() if isinstance(data, collections.Mapping): ListeBrevet = data['brevets'] if data.has_key('number'): print "Found ", data["number"], " patents! and ", len( ListeBrevet), " gathered." else: print 'data corrupted. Do something (destroying data directory is a nice idea)' sys.exit() print len(ListeBrevet), " patents loaded from file." print "Augmenting list with families." ficOk = True
NeededInfo.extend(mixNet) # list of needed field for building the net # may be should use from # from collections import OrderedDict # class OrderedNodeGraph(nx.Graph): # node_dict_factory=OrderedDict # G = OrderedNodeGraph() G1 = nx.DiGraph() # dynamic network for Gephi attr_dict = dict() # attributes for the net # flat net for gexf.js may be it is possible to use previous instead of this one... if 'Description' + ndf in os.listdir( BiblioPath ): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory print network, ": loading data with ", " and ".join( mixNet), " fields." DataBrevet = LoadBiblioFile(BiblioPath, ndf) print "Hi this is Pre-Network processor. Bibliographic data of ", ndf, " patent universe found." else: #Retrocompatibility print "please use Comptatibilizer" print "Nice, ", len( DataBrevet["brevets"] ), " patents found. Pre-formating ", sys.argv[1], " net." for brev in DataBrevet["brevets"]: #tempo = pickle.load(fic) # we only memorize needed nfo pat = OrderedDict() if "date" not in brev.keys(): brev['date'] = '1-1-1' if isinstance(brev['label'], list): brev['label'] = brev['label'][0] for key in NeededInfo:
if len(lstBrevets) == nbTrouves and nbActus == nbTrouves: ficOk = True print nbTrouves, " patents gathered yet. No more patents to retreive. Steping to bibliographic data." else: ficOk = False print nbTrouves, " patents corresponding to the request." print len( lstBrevets ), ' in file corresponding to the request. Retreiving associated bibliographic data' else: print "You prefer not to gather data. I hope you know what you do. At your own risk. P2N may crash" except: try: lstBrevets = LoadBiblioFile(ResultBiblioPath, ndf) nbActus = len(lstBrevets) ficOk = True except: lstBrevets = [ ] # gathering all again, I don t know if of serves the same ordered list of patents ficOknd = False nbTrouves = 1 STOP = False #else: # # print "Good, nothing to do" if not ficOk and GatherPatent: while len(lstBrevets) < nbTrouves and not STOP: if len(lstBrevets) + 25 < 2000:
#should set a working dir one upon a time... done it is temporPath ResultBiblioPath = configFile.ResultBiblioPath ResultPatentPath = configFile.ResultListPath ResultContentsPath = configFile.ResultContentsPath GlobalPath = configFile.GlobalPath # take request from BiblioPatent file if 'Description' + ndf in os.listdir( ResultBiblioPath ) or 'Description' + ndf.title() in os.listdir( ResultBiblioPath ): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory data = LoadBiblioFile(ResultBiblioPath, ndf) requete = data['requete'] else: #Retrocompatibility print "please use Comptatibilizer" #if 'Fusion' in data.keys() data = dict() if GatherFamilly: #pdate needed for families if 'DescriptionFamilies' + ndf in os.listdir( ResultBiblioPath ) or 'DescriptionFamilies' + ndf.title() in os.listdir( ResultBiblioPath ): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory data2 = LoadBiblioFile(ResultBiblioPath, 'Families' + ndf) nbFam = len(data2['brevets']) else: #Retrocompatibility print "please use Comptatibilizer"
def run(): # Bootstrap logging boot_logging() # Load configuration config = LoadConfig() # Run this only if enabled if not config.GatherImages: return # Get some information from configuration expression = config.requete storage_basedir = config.ResultBiblioPath storage_dirname = config.ndf output_path = config.ResultPathImages # Compute prefixes prefixes = [""] if config.GatherFamilly: prefixes.append("Families") # Build maps for all prefixes for prefix in prefixes: # Status message label = label_from_prefix(prefix) logger.info("Generating gallery of drawings for {}. ".format(label)) # Compute storage slot using prefix and DataDirectory # e.g. "Lentille" vs. "FamiliesLentille" storage_name = prefix + storage_dirname # Load bibliographic data biblio_file = LoadBiblioFile(storage_basedir, storage_name) # Generate thumbnails gallery = [] patents = biblio_file['brevets'] for patent in patents: patent_label = get_patent_label(patent) i = 1 logger.info('Processing patent {}'.format(patent_label)) path_img_base = '{}//{}-{}.tiff'.format(output_path, patent_label, '{}') path = path_img_base.format(i) while os.path.exists(path): thumb, orig, tiff = generate_thumbnails(path) gallery.append({ "_id": '{}-{}'.format(patent_label, i), 'thumb': thumb, 'orig': orig, 'label': patent['title'], 'ipcr7': patent['IPCR7'], 'code': patent_label, 'tiff': tiff, }) i += 1 path = path_img_base.format(i) # Render gallery RenderTemplate( 'ModeleImages.html', output_path + '/index' + prefix + '.html', request=expression.replace('"', ''), gallery=gallery, json=json.dumps(gallery), )
u'Citations', # the number of citations granted by the document #u'CitedBy', # the list of docs (patents) cititng this patent #'CitP', # the patents cited by this patent #'CitO' # the other docs cited by this patent ] #"citations" #filterFile = [fi for fi in os.listdir(ListBiblioPath) if fi.count('Expanded')] srcFile = [ fi.replace('Description', '') for fi in os.listdir(ListBiblioPath) ] for ndf in set(srcFile): if 'Description' + ndf in os.listdir( ListBiblioPath ): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory DataBrevet = LoadBiblioFile(ListBiblioPath, ndf) print "\n> Hi! This is FormateExportPivotTable" else: #Retrocompatibility... prévious test is ugly: there is an issue with filename in lowercase (sometimes) print "please use Comptatibilizer" DataBrevet = LoadBiblioFile(ListBiblioPath, ndf) #so I try to laod it.... if isinstance(DataBrevet, collections.Mapping): #data = DataBrevet LstBrevet = DataBrevet['brevets'] if DataBrevet.has_key('number'): print "Found ", DataBrevet[ "number"], " patents! Formating into HMTL Pivot tables" else: print "Found ", len( DataBrevet["brevets"]
if IsEnableScript: Rep = configFile.ResultContentsPath Bib = configFile.ResultBiblioPath prefixes = [""] if GatherFamilly: prefixes.append("Families") for prefix in prefixes: ndf = prefix + configFile.ndf if 'Description' + ndf in os.listdir( Bib ): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory DataBrevet = LoadBiblioFile(Bib, ndf) LstBrevet = DataBrevet['brevets'] else: #Retrocompatibility print "please use Comptatibilizer" try: os.makedirs(Rep + "//Carrot2") except: #directory exists pass temporar = GenereListeFichiers(Rep) for det in ['Abstract', 'Claims', 'Description']: ind = 0 for lang in ['FR', 'EN', 'UNK']: NomResult = lang + '_' + det.replace( 'Abstracts', ''
ResultPathImages = configFile.ResultPathImages P2NFamilly = configFile.GatherFamilly if IsEnableScript: ops_client = epo_ops.Client(key, secret) ops_client.accept_type = 'application/json' prefixes = [""] if P2NFamilly: prefixes.append("Families") for prefix in prefixes: ndf = prefix + configFile.ndf try: biblio_file = LoadBiblioFile(ResultBiblioPath, ndf) except IOError as ex: print 'WARNING: Could not load information for "{}". Not found / error: {}'.format(ndf, ex) patents = biblio_file['brevets'] metadata = {} for patent in patents: patent_label = get_patent_label(patent) pathes = [] path_json = '{}//{}.json'.format(ResultPathImages, patent_label) path_image = '{}//{}-{}.tiff'.format(ResultPathImages, patent_label, '{}') print "Processing patent {}".format(patent_label) js = get_images_meta(ops_client, patent_label, path_json) if not js: continue
ResultListPath = configFile.ResultListPath ResultBiblioPath = configFile.ResultBiblioPath if IsEnableScript: LoadDescs() prefixes = [""] if P2NFamilly: prefixes.append("Families") for prefix in prefixes: ndf = prefix + configFile.ndf try: with open(ResultBiblioPath+'//'+ndf, 'r') as fic: DataBrevets1 = LoadBiblioFile(ResultBiblioPath, ndf) BrevetsTotal = str(len(DataBrevets1['brevets'])) except: print "Error: there are no data to generate de FreePlane file" # End of Load patent file # ### ugly code to patch classification extraction inconsistency for bre in DataBrevets1['brevets']: if isinstance(bre['classification'], list): if '' in bre['classification']: bre['classification'].remove('') bre['IPCR11'] = bre['classification'] lstIPC = [ipc[0] for ipc in bre['classification']] for ipc in lstIPC: