コード例 #1
0
ファイル: batest.py プロジェクト: baturay/RML-AC
def main():
    # Takes in the file and parses into datum's.
    D = cData(sys.argv[1])
    # Starting points from the pickle or not.
    EmAlg = parseCommandLine(D, sys.argv)   
   
    prevCons = 0
    totalcons = 0
    nmiResult = evaluateEM_NMI(D, EmAlg)
    print "Initial nmi: ",nmiResult
    
    consobj = Cons.cCons(D)
    for numCons in range(1,len(D.data)/4,1):
        consobj.constype = Cons.cCons.eConsType.TripCenterChunk
        consobj.centerChunkSize = 0.2
        cons = consobj.tripCons(EmAlg.mGammas,numCons-prevCons)
        prevCons = numCons
        totalcons += len(cons)
        for i in cons:
            EmAlg.mCij[i[0]][i[1]] = i[2]
            EmAlg.mCij[i[1]][i[0]] = i[2] 
        EmAlg.EM(len(D.classlist))
        nmiresult = evaluateEM_NMI(D, EmAlg)
        print numCons, ",", nmiresult, ",", totalcons 
        if(nmiresult > 0.999 or len(D.data)==numCons):
            break
コード例 #2
0
ファイル: parts_exp.py プロジェクト: baturay/RML-AC
def scenario1(D, sFileStub):
    f = open(sFileStub + ".scen1.results", "w")

    EMResults = []
    for iOutertrial in range(numOuterTrials):
        f.write("outertrial: %d\n" % iOutertrial)
        f.write("likelihood,NMI\n")

        bestEM = []
        bestLikelihood = 0
        for iRestart in range(numInnerTrials):
            EMAlg = EM.cEM(D)
            EMAlg.bPPC = False
            EMAlg.EM(len(D.classlist))
            if iRestart == 0 or EMAlg.dEMLikelihood > bestLikelihood:
                bestLikelihood = EMAlg.dEMLikelihood
                bestEM = EMAlg

        EMResults.append(bestEM)

        f.write("%f,%f\n" % (bestLikelihood,
                             utils.evaluateEM_NMI(D, EMAlg) ) )
        f.flush()
    f.close()
    return EMResults
コード例 #3
0
ファイル: parts_exp.py プロジェクト: baturay/RML-AC
def TripConsTest(D, sNum, EMStarts, fp):
    fp.write("trips,queries,cons,likelihood,NMI\n")
    for option in TripConsOptions.lOptions:
        optname = TripConsOptions.lOptionNames[option]
        fp.write(optname + "\n")
        for iOutertrial in range(numOuterTrials):
            print "scenario ", sNum, " options ", optname, " outertrial ", iOutertrial
            fp.write("outertrial: %d\n" % iOutertrial)

            em = copy.deepcopy(EMStarts[iOutertrial])
            em.bPPC = True

            prevTrips = 0
            totalCons = 0
            nmiResult = utils.evaluateEM_NMI(D, em)
            fp.write("Initial nmi: %f\n" % nmiResult)
            consobj = Cons.cCons(D)
            for numTrips in range(1,len(D.data)/4,1):
                if option == TripConsOptions.CenterChunkCons:
                    consobj.constype = Cons.cCons.eConsType.TripCenterChunk
                elif option == TripConsOptions.MidCons:
                    consobj.constype = Cons.cCons.eConsType.TripMids

                print em.mLikelihood_il

                cons = consobj.tripCons(em.mGammas,numTrips-prevTrips)
                prevTrips = numTrips
                totalCons += len(cons)
                for i in cons:
                    em.mCij[i[0]][i[1]] = i[2]
                    em.mCij[i[1]][i[0]] = i[2] 
                em.EM(len(D.classlist))

                nmiresult = utils.evaluateEM_NMI(D, em)
                fp.write("%d,%d,%d,%f,%f\n" % (numTrips,
                                              numTrips*14,
                                              totalCons,
                                              em.dEMLikelihood,
                                              nmiresult) )
                fp.flush()

                if (nmiresult > 0.999 or len(D.data)==numTrips):
                    break
コード例 #4
0
ファイル: Starts.py プロジェクト: baturay/RML-AC
    def goodInitial (self,D,em,emclusters,RepPts,fp):
        # Consistent means all the midpoints are same with the center.
        constraints = []
        iters = 0

        indEMClusters = range(len(emclusters))
        lResetExclusions = []
        numUserQueries = 0
        for cl in emclusters:
            print ([D.data[i.index].cl for i in cl.midpoints],D.data[cl.center.index].cl)," ",cl.center.index
         
        while len(indEMClusters) != 0 and iters < 5:
            
            resetCenters = []
            
            for ind in indEMClusters[:]:
                cl = emclusters[ind]
                if(len(cl.midpoints) <= 1):
                    resetCenters.append(ind)
                    continue

                # simulate feedback from real classes
                realpoints = [D.data[i.index] for i in cl.midpoints]
                realcenter = D.data[cl.center.index]
                numUserQueries += len(realpoints) + 1
                # points in realpoints s.t. their real class is same as center
                rightclass = filter(lambda x: x.cl==realcenter.cl,realpoints)
                rightclass.append(realcenter)
                wrongclass = filter(lambda x: x.cl!=realcenter.cl,realpoints)

                # All the leftovers...
                if len(wrongclass) == 0:
                    indEMClusters.remove(ind)
                    lResetExclusions.extend( [x.index for x in rightclass] )
                else:
                    resetCenters.append(ind)
                    
                # Cross constraints between right and wrong classes.
                for i in rightclass:
                    for j in realpoints:
                        if j in wrongclass:
                            constraints.append([i.index,j.index,-2])
                        elif j!= i:
                            constraints.append([i.index,j.index,2])
                for i in constraints:
                    em.mCij[i[0]][i[1]] = i[2]
                    em.mCij[i[1]][i[0]] = i[2]

            # If all classes are not right, restart.
            em.resetSomeCenters(em.lInitialCenters,resetCenters,lResetExclusions)
            em.EM(len(emclusters))
            emclusters = RepPts.createClusters(em)
            RepPts.repPoints(em, emclusters)
            print "goodInitial iter nmi: ", evaluateEM_NMI(D,em)," ",iters
            iters += 1

            # queries,cons,likelihood,NMI
            maybeWrite(fp,
                       "%d,%d,%f,%f\n" % (numUserQueries,
                                          len(constraints),
                                          em.dEMLikelihood,
                                          evaluateEM_NMI(D,em) ) )
            print indEMClusters
            for cl in emclusters:
                print ([D.data[i.index].cl for i in cl.midpoints],D.data[cl.center.index].cl)," ",cl.center.index
            
        
        return em
コード例 #5
0
ファイル: LL_restart_test.py プロジェクト: baturay/RML-AC
import numpy
import EM
import cData
import utils

# run EM several times and get the likelihood
for iRestart in range(20):
    D = cData.cData("data/winenorm3_pyre.csv")
    # D = cData.cData("data/normvert.csv")
    M = EM.cEM(D)
    M.bPPC = False

    M.EM(3)
    print M.dEMLikelihood,
    print " nmi: ",
    print utils.evaluateEM_NMI(D, M)