Example #1
0
def computeScores(bn_name,csv_name,visible=False,transforme_label=None):
    if isinstance(bn_name,str):
      bn=gum.loadBN(bn_name)
    else:
      bn=bn_name

    nbr_lines=lines_count(csv_name)-1

    csvfile = open(csv_name, "rb")
    dialect = csv.Sniffer().sniff(csvfile.read(1024))
    csvfile.seek(0)

    batchReader = csv.reader(open(csv_name,'rb'),dialect)

    titre = batchReader.next()
    fields = {}
    for i,nom in enumerate(titre):
        fields[nom]=i

    positions=checkCompatibility(bn,fields,csv_name)
    if positions is None:
         sys.exit(1)

    inst=gum.Instantiation()
    bn.completeInstantiation(inst)

    if visible:
        prog = ProgressBar(csv_name+' : ',0, nbr_lines, 77,  mode='static', char='#')
        prog.display()

    nbr_insignificant=0
    num_ligne=0
    likelihood=0.0
    for data in batchReader:
        num_ligne+=1

        for i in range(inst.nbrDim()):
            try:
                inst.chgVal(i,getNumLabel(inst,i,data[positions[i]],transforme_label))
            except gum.OutOfBounds:
                print("out of bounds",i,positions[i],data[positions[i]],inst.variable(i))

        p=bn.jointProbability(inst)
        if p==0.0:
            print(str(num_ligne)+":"+str(inst))
            nbr_insignificant+=1
        else:
            likelihood+=math.log(p,2)
        if visible:
            prog.increment_amount()
            prog.display()

    if visible:
        print

    nbr_arcs=1.0*bn.sizeArcs()
    dim=1.0*bn.dim()

    aic=likelihood-dim
    aicc=2*aic-2*dim*(dim+1)/(nbr_lines-dim+1) if (nbr_lines-dim+1>0) else "undefined"
    bic=likelihood-dim*math.log(nbr_lines,2)
    mdl=likelihood-nbr_arcs*math.log(nbr_lines,2)-32*dim #32=nbr bits for a params

    return ((nbr_lines-nbr_insignificant)*100.0/nbr_lines,
            {'likelihood':likelihood,'aic':aic,'aicc':aicc,'bic':bic,'mdl':mdl})