コード例 #1
0
ファイル: P2N-PreNetworks.py プロジェクト: Polo6767/P2N-v3
        else:  #Retrocompatibility
            print("please use Comptatibilizer")

        print("Nice, ", len(DataBrevet["brevets"]),
              " patents found. Pre-formating ", sys.argv[1], " net.")
        for brev in DataBrevet["brevets"]:
            #tempo = pickle.load(fic) # we only memorize needed nfo
            pat = OrderedDict()
            if "date" not in list(brev.keys()):
                brev['date'] = '1-1-1'
            if isinstance(brev['label'], list):
                brev['label'] = brev['label'][0]
            for key in NeededInfo:

                if key.count('nice') > 0 and isinstance(brev[key], list):
                    brev[key] = flatten(brev[key])
                    machin = []
                    for truc in brev[key]:
                        Tt = Cleaning(truc)
                        if Tt is not None and len(Tt) > 0:
                            machin.append(Tt)
                    if len(machin) > 0:
                        pat[key] = machin  #can you do more ugly ?
                    else:
                        pat[key] = ''

                elif isinstance(brev[key], list):
                    pat[key] = flatten(brev[key])
                    if key.count('Date') == 0:
                        pat[key] = [
                            cont for cont in brev[key]
コード例 #2
0
        Done = []
    if 0 <= len(Done) <= len(ListeBrevet):
        tempoList = []
        try:
            #ndfLstBrev = open(ResultPath+'//Families'+ ndf, 'r')
            BrevetFam = LoadBiblioFile(ResultPath, "Families" + ndf)
            ListeBrevetAug = BrevetFam['brevets']
            #adding already gathered
            for bre in ListeBrevetAug:
                DoneLab.append(bre['label'])

#            if isinstance(data, collections.Mapping):
#                ListeBrevetAug = data['brevets']
#            else:
#                ListeBrevetAug = data
            flatten(DoneLab)
            print(len(ListeBrevetAug),
                  " patents loaded, already in families list")
            if len(ListeBrevetAug) == 0:
                Done = []
            else:
                for k in ListLab:  #filtering
                    if k not in DoneLab:
                        for brev in ListeBrevet:
                            if brev['label'] == k:
                                tempoList.append(brev)
                ListeBrevet = tempoList
            print(len(DoneLab), ' patents treated yet... doing others : ',
                  len(ListeBrevet))
            if len(ListeBrevet) == 0:
                print("Good, nothing to do!")
コード例 #3
0
        pickle.dump(DataBrevets, ficRes)            
    
    

    print ('deleted ', cpt, ' abstracts')
fic = 'Families' + ndf
if 'Description' + fic in os.listdir(ResultBiblioPath):
    with open(ListBiblioPath + '//' + fic, 'r', encoding ="utf8") as data:
        dico = LoadBiblioFile(ListBiblioPath, fic)    
    os.rename(ResultBiblioPath + '//' + fic, ResultBiblioPath + '//Old' + fic)
    
    #the following should be done by GatherFamilies process. 
    # just in case...
    dat = dico['brevets']
    labs = [bre ['label'] for bre in dat]
    labs = flatten(labs)  # some patents have multiples labels
    cpt1, cpt2 = 0,0
    if len(labs) != len(set(labs)):
        DejaVus = []
        with open(ResultBiblioPath+'//Families'+ ndf, 'wb') as ndfLstBrev:
            for bre in dat:
                for cle in bre.keys():
                    if isinstance(bre[cle], list): #cleaning
                        bre[cle] = list(set(bre[cle]))
                if isinstance(bre['label'], str):
                    if bre['label'] not in DejaVus:
                        pickle.dump(bre , ndfLstBrev)
                        DejaVus.append(bre['label'])
                        cpt1 +=1
                    else:
                        pass
コード例 #4
0
        Done = []
    if 0 <= len(Done) <= len(ListeBrevet):
        tempoList = []
        try:
            #ndfLstBrev = open(ResultPath+'//Families'+ ndf, 'r')
            BrevetFam = LoadBiblioFile(ResultPath, "Families" + ndf)
            ListeBrevetAug = BrevetFam['brevets']
            #adding already gathered
            for bre in ListeBrevetAug:
                DoneLab.append(bre['label'])

#            if isinstance(data, collections.Mapping):
#                ListeBrevetAug = data['brevets']
#            else:
#                ListeBrevetAug = data
            DoneLab = flatten(DoneLab)
            DoneLab = flatten(DoneLab)
            print(len(ListeBrevetAug),
                  " patents loaded, already in families list")
            if len(ListeBrevetAug) == 0:
                Done = []
            else:
                for k in ListLab:  #filtering
                    if k not in DoneLab:
                        for brev in ListeBrevet:
                            if brev['label'] == k:
                                tempoList.append(brev)
                ListeBrevet = tempoList
            print(len(DoneLab), ' patents treated yet... doing others : ',
                  len(ListeBrevet))
            if len(ListeBrevet) == 0: