Example #1
0
 def impdata(self):
     self.basedata = []
     f = open(self.filebase, 'r')
     for line in f.readlines():
         data = line.split(' ')
         #print data
         d = cData.cData(data[0], data[1:],data[6])
         self.basedata.append(d)          
     f.close()
     print "base data len : ", len(self.basedata)
     
     self.realdata = []
     f = open(self.filereal, 'r')
     for line in f.readlines():
         data = line.split(' ')
         #print data
         d = cData.cData(data[0], data[1:],data[6])
         self.realdata.append(d)          
     f.close()
     print "real data len : ", len(self.realdata)
Example #2
0
 def gencode(self):
     d = []
     for i in range(self.count):
         d.append(i + 1)
     while True:
         d = self.getnext(d)
         if d is not None:
             cd = cData.cData(0, d, 0)
             self.total += 1
             yield cd
         else : 
             break
     yield None
Example #3
0
# After several rounds of having the user provide
# feedback on starting points, we proceed with
# random pairwise constraints and watch to see our
# NMI increase

import pickle
import EM
import cData
import sys

if len(sys.argv) < 2:
    print "provide filename and optionally a filename for the pickle of centers"
    exit(1)

# use the same code for getting initial points as baturay did
D = cData.cData(sys.argv[1])
D.setType("2", "random")
EmAlg = EM.cEM(D)
EmAlg.EM(len(D.classlist))
EmAlg.bPPC = True 
#Creates clusters depending on what EM guessed.
D.createClusters(EmAlg)
#Finds the outerpoints and the midpoints and assigns them in emclusters.
D.repPoints(EmAlg)
#This makes the algorithm start with good initial points.
EmAlg = D.goodInitial(EmAlg)

print "pickling starting position to: ",
picklefname = "pickles/"+sys.argv[1].split('/')[-1]+".pickle"
if len(sys.argv) > 2:
    picklefname = sys.argv[2]
Example #4
0
def run():
    D = cData.cData("data/winenorm3_pyre.csv")
    E = EM.cEM(D)
    E.EM(3)
Example #5
0
import EM
import cData
D = cData.cData("data/DATASET3_trim.csv")
# load full constraint set into cData

# run PPC with same initial clusters with increasing number constraints
M = EM(D)

numCent = 6

for numCons in [500]:
    
Example #6
0
bNewCenters = False
if len(sys.argv) > 2:
    # check for 'new centers' bypass
    if sys.argv[2] == "__NEW__":
        bNewCenters = True
    else:
        lICenters = pickle.load(open(sys.argv[2]))
else:
    # check for default initial centers pickle
    sfDefltPkl = sfData + ".icenters.pickle"
    if os.access(sfDefltPkl, os.R_OK):
        lICenters = pickle.load(open(sys.argv[2]))
    else:
        bNewCenters = True

D = cData.cData(sfData)
PPC = EM.EM(D)
if bNewCenters:
    #PPC = D.emRestarts(10)
    #PPC.EM(len(D.data))
    PPC.EM(3)
    
    print PPC.lInitialCenters
    lICenters = PPC.lInitialCenters[:]

D.poscons = [(i,j) for i in range(len(D.data)) for j in range(len(D.data))]
cons = D.pairCons(500)
for i in cons:
    PPC.mCij[i[0]][i[1]] = i[2]
    PPC.mCij[i[1]][i[0]] = i[2]
Example #7
0
import numpy
import EM
import cData
import utils

# run EM several times and get the likelihood
for iRestart in range(20):
    D = cData.cData("data/winenorm3_pyre.csv")
    # D = cData.cData("data/normvert.csv")
    M = EM.cEM(D)
    M.bPPC = False

    M.EM(3)
    print M.dEMLikelihood,
    print " nmi: ",
    print utils.evaluateEM_NMI(D, M)
Example #8
0
    sCommands = "s1_s2_s3_s4"

    argc = len(sys.argv)
    if argc >= 2:
        sNickname = sys.argv[1]
    if argc >= 3: # includes filename stub of input
        lDataFiles = [ sys.argv[2] ]
    if argc == 4:
        sCommands = sys.argv[3]
    if argc < 2 or argc > 4:
        errmsg()
        exit(1)

    lCommands = sCommands.split("_")
    for dfname in lDataFiles:
        D = cData.cData("data/" + dfname + ".csv")

        # scenario 1
        if "s1" in lCommands:
            print "running scenario 1"
            EMStartsS1 = scenario1(D, "results/" + sNickname + "." + dfname)
            print "pickling results of scenario 1"
            pf = open("results/" + sNickname + "." + dfname + ".scen1.pickle", "w")
            pickle.dump(EMStartsS1, pf)
            pf.close()

        # load results from scenario 1 pickle
        if "p1" in lCommands:
            print "loading scenario 1 results from pickle"
            pf = open("results/" + sNickname + "." + dfname + ".scen1.pickle")
            EMStartsS1 = pickle.load(pf)
Example #9
0
File: EM.py Project: baturay/RML-AC
    def EMLikelihood(self):
        # formula is sum over n,l of gamma(i,l) * z(i,l) where
        # z = 1 if i is in cluster k, else 0
        nData = len(self.mData.data)
        LL = 0
        membership = np.ravel(self.mGammas.argmax(1).T)
        for i in range(nData):
            for k in range(len(self.lCenters)):
                if membership[i] == k:
                    LL += self.mLikelihood_il[i,k]

        self.dEMLikelihood = LL
        return LL

    # give the class labels of each data item
    def Membership(self):
        return np.ravel(self.mGammas.argmax(1).T)

def printDims(v, textv):
    print "dims ", textv, np.size(v,0), np.size(v,1)

def printDim(v, textv):
    print "dim ", textv, np.size(v,0)

if __name__ == "__main__":
    import cData
    D = cData.cData("data/winenorm3_pyre.txt")
    M = cEM(D)
    M.EM(3)