def parseCommandLine (D,argv): if len(argv) != 2 and len(argv) != 3: print("Error - usage is " + argv[0] + " <data_file> <startingdatapoints> ") sys.exit(1) RP = RepPoints.RepPoints() EmAlg = cEM(D) if len(argv)>2: f = open(argv[2],"r") EmAlg = pickle.load(f) EmAlg.EM(len(D.classlist)) EmAlg.bPPC = True emclusts = RP.createClusters(EmAlg) RP.repPoints(EmAlg, emclusts) else: EmAlg.EM(len(D.classlist)) EmAlg.bPPC = True # Creates clusters depending on what EM guessed. emclusts = RP.createClusters(EmAlg) # Finds the outerpoints and the midpoints and assigns them in emclusters. RP.repPoints(EmAlg, emclusts) # This makes the algorithm start with good initial points. starter = Starts.starts() EmAlg = starter.goodInitial(D, EmAlg, emclusts, RP, 0) f = open("pickles/"+argv[1].split('/')[-1]+"pickle","w") pickle.dump(EmAlg,f) EmAlg.EM(len(D.classlist)) f.close() return EmAlg
def scenario2(D, EMStarts, sFileStub): f = open(sFileStub + ".scen2.results", "w") dEMResults = {} # from option string to list of EMs f.write("queries,cons,likelihood,NMI\n") for option in StarterOptions.lOptions: EMOptionResults = [] f.write(StarterOptions.lOptionNames[option] + "\n") for iOutertrial in range(numOuterTrials): f.write("outertrial: %d\n" % iOutertrial) f.flush() # run the initial points algorithm several # times for this set of options and this initial point #for iRestart in range(numInnerTrials): # get starting EM em = copy.deepcopy(EMStarts[iOutertrial]) em.bPPC = True # setup and run goodInitial RP = RepPoints.RepPoints() emclusts = RP.createClusters(em) RP.repPoints(em, emclusts) starter = Starts.starts() newEM = starter.goodInitial(D, em, emclusts, RP, f) EMOptionResults.append(newEM) dEMResults[StarterOptions.lOptionNames[option]] = EMOptionResults f.close() return dEMResults