else: #Retrocompatibility print("please use Comptatibilizer") print("Nice, ", len(DataBrevet["brevets"]), " patents found. Pre-formating ", sys.argv[1], " net.") for brev in DataBrevet["brevets"]: #tempo = pickle.load(fic) # we only memorize needed nfo pat = OrderedDict() if "date" not in list(brev.keys()): brev['date'] = '1-1-1' if isinstance(brev['label'], list): brev['label'] = brev['label'][0] for key in NeededInfo: if key.count('nice') > 0 and isinstance(brev[key], list): brev[key] = flatten(brev[key]) machin = [] for truc in brev[key]: Tt = Cleaning(truc) if Tt is not None and len(Tt) > 0: machin.append(Tt) if len(machin) > 0: pat[key] = machin #can you do more ugly ? else: pat[key] = '' elif isinstance(brev[key], list): pat[key] = flatten(brev[key]) if key.count('Date') == 0: pat[key] = [ cont for cont in brev[key]
Done = [] if 0 <= len(Done) <= len(ListeBrevet): tempoList = [] try: #ndfLstBrev = open(ResultPath+'//Families'+ ndf, 'r') BrevetFam = LoadBiblioFile(ResultPath, "Families" + ndf) ListeBrevetAug = BrevetFam['brevets'] #adding already gathered for bre in ListeBrevetAug: DoneLab.append(bre['label']) # if isinstance(data, collections.Mapping): # ListeBrevetAug = data['brevets'] # else: # ListeBrevetAug = data flatten(DoneLab) print(len(ListeBrevetAug), " patents loaded, already in families list") if len(ListeBrevetAug) == 0: Done = [] else: for k in ListLab: #filtering if k not in DoneLab: for brev in ListeBrevet: if brev['label'] == k: tempoList.append(brev) ListeBrevet = tempoList print(len(DoneLab), ' patents treated yet... doing others : ', len(ListeBrevet)) if len(ListeBrevet) == 0: print("Good, nothing to do!")
pickle.dump(DataBrevets, ficRes) print ('deleted ', cpt, ' abstracts') fic = 'Families' + ndf if 'Description' + fic in os.listdir(ResultBiblioPath): with open(ListBiblioPath + '//' + fic, 'r', encoding ="utf8") as data: dico = LoadBiblioFile(ListBiblioPath, fic) os.rename(ResultBiblioPath + '//' + fic, ResultBiblioPath + '//Old' + fic) #the following should be done by GatherFamilies process. # just in case... dat = dico['brevets'] labs = [bre ['label'] for bre in dat] labs = flatten(labs) # some patents have multiples labels cpt1, cpt2 = 0,0 if len(labs) != len(set(labs)): DejaVus = [] with open(ResultBiblioPath+'//Families'+ ndf, 'wb') as ndfLstBrev: for bre in dat: for cle in bre.keys(): if isinstance(bre[cle], list): #cleaning bre[cle] = list(set(bre[cle])) if isinstance(bre['label'], str): if bre['label'] not in DejaVus: pickle.dump(bre , ndfLstBrev) DejaVus.append(bre['label']) cpt1 +=1 else: pass
Done = [] if 0 <= len(Done) <= len(ListeBrevet): tempoList = [] try: #ndfLstBrev = open(ResultPath+'//Families'+ ndf, 'r') BrevetFam = LoadBiblioFile(ResultPath, "Families" + ndf) ListeBrevetAug = BrevetFam['brevets'] #adding already gathered for bre in ListeBrevetAug: DoneLab.append(bre['label']) # if isinstance(data, collections.Mapping): # ListeBrevetAug = data['brevets'] # else: # ListeBrevetAug = data DoneLab = flatten(DoneLab) DoneLab = flatten(DoneLab) print(len(ListeBrevetAug), " patents loaded, already in families list") if len(ListeBrevetAug) == 0: Done = [] else: for k in ListLab: #filtering if k not in DoneLab: for brev in ListeBrevet: if brev['label'] == k: tempoList.append(brev) ListeBrevet = tempoList print(len(DoneLab), ' patents treated yet... doing others : ', len(ListeBrevet)) if len(ListeBrevet) == 0: