def computeScores(bn_name,csv_name,visible=False,transforme_label=None): if isinstance(bn_name,str): bn=gum.loadBN(bn_name) else: bn=bn_name nbr_lines=lines_count(csv_name)-1 csvfile = open(csv_name, "rb") dialect = csv.Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) batchReader = csv.reader(open(csv_name,'rb'),dialect) titre = batchReader.next() fields = {} for i,nom in enumerate(titre): fields[nom]=i positions=checkCompatibility(bn,fields,csv_name) if positions is None: sys.exit(1) inst=gum.Instantiation() bn.completeInstantiation(inst) if visible: prog = ProgressBar(csv_name+' : ',0, nbr_lines, 77, mode='static', char='#') prog.display() nbr_insignificant=0 num_ligne=0 likelihood=0.0 for data in batchReader: num_ligne+=1 for i in range(inst.nbrDim()): try: inst.chgVal(i,getNumLabel(inst,i,data[positions[i]],transforme_label)) except gum.OutOfBounds: print("out of bounds",i,positions[i],data[positions[i]],inst.variable(i)) p=bn.jointProbability(inst) if p==0.0: print(str(num_ligne)+":"+str(inst)) nbr_insignificant+=1 else: likelihood+=math.log(p,2) if visible: prog.increment_amount() prog.display() if visible: print nbr_arcs=1.0*bn.sizeArcs() dim=1.0*bn.dim() aic=likelihood-dim aicc=2*aic-2*dim*(dim+1)/(nbr_lines-dim+1) if (nbr_lines-dim+1>0) else "undefined" bic=likelihood-dim*math.log(nbr_lines,2) mdl=likelihood-nbr_arcs*math.log(nbr_lines,2)-32*dim #32=nbr bits for a params return ((nbr_lines-nbr_insignificant)*100.0/nbr_lines, {'likelihood':likelihood,'aic':aic,'aicc':aicc,'bic':bic,'mdl':mdl})