dateMini = datetime.date(3000,1,1) dateMaxi = datetime.date(1000,1,1) NeededInfo .extend(mixNet) # list of needed field for building the net G1 = nx.DiGraph() # dynamic network for Gephi attr_dict = dict() # attributes for the net G2 = nx.DiGraph() # flat net for gexf.js may be it is possible to use previous instead of this one... print network, ": loading data with ", " and ".join(mixNet), " fields." with open (BiblioPath+'//'+ndf, 'r') as fic: DataBrevet = pickle.load(fic) for brev in DataBrevet["brevets"]: #tempo = pickle.load(fic) # we only memorize needed nfo pat = OrderedDict () for key in NeededInfo: if isinstance(brev[key], list): brev[key]= flatten(brev[key]) brev[key]= [cont for cont in brev[key] if (cont !='empty' or cont != 'Empty' or cont !='')] elif isinstance (brev[key], unicode) or isinstance (brev[key], str): brev[key].replace('empty', '') pat[key] = brev[key] for flatPat in DecoupeOnTheFly(pat, []): if flatPat not in ListeBrevet: ListeBrevet.append(flatPat) if pat['label'] not in Patents: Patents.add(pat['label']) for lab in Patents: temp = [] for bre in [brev for brev in ListeBrevet if brev['label']==lab]: for cat in mixNet:
tempFiltered = [] LabList = [pat['label'] for pat in temp] for pat in LabList: tempoPat = [ patent for patent in temp if patent['label'] == pat ] # fusionning several patents wwith same label # OPS model seems to save one entry for several status documents... # in P2N model, label is unique key... so properties are lists.. this is the jobs of update function hereafter tempoRar = dict() for pate in tempoPat: tempoRar = Update(pate, tempoRar) for clef in tempoRar.keys(): if isinstance(tempoRar[clef], list): tempoRar[clef] = flatten(tempoRar[clef]) tempo = [] for contenu in tempoRar[clef]: if contenu is not None: tempo.append(contenu) else: if '' not in tempo and len( tempo) == 0: tempo.append('') tempoRar[clef] = tempo else: pass #should be good here if pat not in YetIn: tempFiltered.append(dictCleaner(tempoRar)) YetIn.append(pat) else:
print "please use Comptatibilizer" print "Nice, ", len( DataBrevet["brevets"] ), " patents found. Pre-formating ", sys.argv[1], " net." for brev in DataBrevet["brevets"]: #tempo = pickle.load(fic) # we only memorize needed nfo pat = OrderedDict() if "date" not in brev.keys(): brev['date'] = '1-1-1' if isinstance(brev['label'], list): brev['label'] = brev['label'][0] for key in NeededInfo: if key.count('nice') > 0 and isinstance(brev[key], list): brev[key] = flatten(brev[key]) machin = [] for truc in brev[key]: Tt = Cleaning(truc) if Tt is not None and len(Tt) > 0: machin.append(Tt) if len(machin) > 0: pat[key] = machin #can you do more ugly ? else: pat[key] = '' elif isinstance(brev[key], list): pat[key] = flatten(brev[key]) if key.count('Date') == 0: pat[key] = [ cont for cont in brev[key]
dateMaxi = datetime.date(1000, 1, 1) NeededInfo.extend(mixNet) # list of needed field for building the net G1 = nx.DiGraph() # dynamic network for Gephi attr_dict = dict() # attributes for the net G2 = nx.DiGraph( ) # flat net for gexf.js may be it is possible to use previous instead of this one... print network, ": loading data with ", " and ".join(mixNet), " fields." with open(BiblioPath + '//' + ndf, 'r') as fic: DataBrevet = pickle.load(fic) for brev in DataBrevet["brevets"]: #tempo = pickle.load(fic) # we only memorize needed nfo pat = OrderedDict() for key in NeededInfo: if isinstance(brev[key], list): brev[key] = flatten(brev[key]) brev[key] = [ cont for cont in brev[key] if (cont != 'empty' or cont != 'Empty' or cont != '') ] elif isinstance(brev[key], unicode) or isinstance( brev[key], str): brev[key].replace('empty', '') pat[key] = brev[key] for flatPat in DecoupeOnTheFly(pat, []): if flatPat not in ListeBrevet: ListeBrevet.append(flatPat) if pat['label'] not in Patents: Patents.add(pat['label'])