def DP_model(X, n_politicians, n_parties, data_desc, a_beta, b_beta, theta, gibbs_sweeps, burn_in): start_time = time.time() if theta == 0: # Get theta using newton's method theta = DP.crp_parameters(n_politicians, n_parties, 10000) print("Got CRP parameters\n") model = "DP-Beta-Bernoulli" params = {'a': a_beta, 'b': b_beta, 'theta': theta, 'T': gibbs_sweeps} # Compute Z matrices Z = DP.beta_bernoulli_irm(X=X, **params) print("computed Z matrices\n") # Compute co-clustering matrix co_clust_matrix, avg_n_clusters = co_clustering_matrix( Z, burn_in_factor=burn_in) print("computed co-clutering matrix\n") print(f"average number of clusters {avg_n_clusters}\n") # Get point estimate labels = point_estimate(co_clust_matrix, avg_n_clusters) final_time = time.time() diff_time = final_time - start_time build_output(data_desc, model, params, co_clust_matrix, avg_n_clusters, labels, diff_time)
def loaddata(): print('loading the data.....') # 标签建词典 lab_v = [] lab_v.append('negative') lab_v.append('positive') # 处理训练数据 包括建数据词典等 t_data_list_node, data_v = dp.datadeal('data/raw.clean.train', is_traindata=True) # 处理dev数据 d_data_list_node = dp.datadeal('data/raw.clean.dev', is_traindata=False) # 处理test数据 test_data_node = dp.datadeal('data/raw.clean.test', is_traindata=False) if args.out_word_v: args.word_embed = out_word_vec.add_word_v(data_v) args.embed_num = len(data_v) args.class_num = len(lab_v) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): if attr == 'word_embed': continue print("\t{}={}".format(attr.upper(), value)) return t_data_list_node, data_v, d_data_list_node, lab_v
def randomReconWrapper(dirName, D, T, L, numSamples, typeGen): """Takes in a directory of newick files, dirName, duplication, loss and transfer costs, the number of desired random reconciliations, and the type of generator (biased or uniform), and calls those random generators to build a file containing the number of temporal inconsistencies found in those randomly generated reconciliations as well as other information relating to the file""" totalTimeTravel = 0 # To record total number of time travels in directory outOf = 0 # To record total number of reconciliations made # loop through files in directory for fileName in os.listdir(dirName): if fileName.endswith('.newick'): f = open(fileName[:-7]+'.txt', 'w') f.write(typeGen+" random reconciliations"+"\n") hostTree, parasiteTree, phi = newickFormatReader.getInput\ (dirName+"/"+fileName) # find size of parasite and host trees parasiteSize = len(parasiteTree)+1 hostSize = len(hostTree)+1 DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L) rootList = rootGenerator(DTLReconGraph, parasiteTree) randomReconList = [] for n in range(numSamples): timeTravelCount = 0 startRoot = random.choice(rootList) if typeGen == "uniform": currentRecon = uniformRecon(DTLReconGraph, [startRoot], {}) else: normalizeDTL = normalizer(DTLReconGraph) currentRecon = biasedRecon(normalizeDTL, [startRoot], {}) for key in currentRecon.keys(): currentRecon[key] = currentRecon[key][:-1] randomReconList.append(currentRecon) # make sure there are no duplicate reconciliations uniqueReconList = [] for recon in randomReconList: if not recon in uniqueReconList: uniqueReconList.append(recon) outOf += len(uniqueReconList) for recon in uniqueReconList: graph = reconciliationGraph.buildReconstruction\ (hostTree, parasiteTree, recon) currentOrder = orderGraph.date(graph) numTrans = findTransfers(recon) if currentOrder == 'timeTravel': f.write("Temporal Inconsistency, reconciliation has "+str(numTrans)+" transfers"+"\n") timeTravelCount += 1 totalTimeTravel += 1 else: f.write("No temporal inconsistencies, reconciliation has "+str(numTrans)+" transfers"+"\n") f.write(fileName+" contains "+str(timeTravelCount)+" temporal "+ \ "inconsistencies out of "+ str(len(uniqueReconList))+ \ " reconciliations."+"\n"+"Total number of reconciliations: "+\ str(numRecon)+"\n"+"Host tree size: "+str(hostSize)+"\n"+\ "Parasite tree size: "+str(parasiteSize)+ "\n") f.close() print "Total fraction of temporal inconsistencies in directory: ", \ totalTimeTravel, '/', outOf
def getCostscapeDTLs(DTLPairs, hostTree, parasiteTree, phi): """This function takes as input DTLPairs, a list of tuples with T and L costs, and the hostTree, parasiteTree, and phi. It returns a list of DTLs who scores are computed with the T and L values from each element in DTLPairs.""" DTLList = [] for i in DTLPairs: newDTL = DP(hostTree, parasiteTree, phi, 1, i[0], i[1])[0] DTLList.append(newDTL) return DTLList
def plotTrain(dataset, catMus, catVariances,durationRange = np.linspace(-dimensionRange,dimensionRange,1000),colour="#0000FF",xBottom=0,alpha=0.5): maxHieght = 0. #ff.niceGraph() for c in range(len(catMus)): mu = catMus[c] sd = np.sqrt(catVariances[c]) density = normList(NORM.pdf(durationRange,loc=mu,scale=sd)) maxDensity = max(density) if maxDensity > maxHieght: maxHieght = DP.dc(maxDensity) pl.fill_between(durationRange,xBottom, density+xBottom,alpha=alpha,color=".35")#color="#FF1493" pl.text(durationRange[np.where(density==max(density))[0]-2], maxDensity+xBottom+.001, r'$\mu_%d$' %c,fontsize=10)
def sampleWang(X,cp,parameters=modelParameters): Z = list(np.zeros(len(X))) sampler = DP.sampleDPMM(X,Z,cp, DP.gaussianMarginalLikelihood, DP.gaussianMAPPostPred, parameters, iterations=2000, burn=1000, thin=5, cpSampler=DP.sampleCP) posteriorSamples = sampler.wangSUGS() return posteriorSamples
def sampleClusters(X,cp): Z = list(np.zeros(len(X))) sampler = DP.sampleDPMM(X,Z,cp, DP.gaussianMarginalLikelihood, DP.gaussianPostPred, modelParameters, iterations=2000, burn=1000, thin=5, cpSampler=DP.sampleCP) posteriorSamples = sampler.gibbs() return posteriorSamples
def newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L): """This function takes as input hostTree, parasiteTree, phi, duplication cost D, transfer cost T, and loss cost L, and returns the newDTL whose scores were calculated from costscape.""" H, P, phi = newickFormatReader.getInput(newickFile) originalDTL, numRecon, leaves = DP(H, P, phi, D, T, L) pointList = findCenters(newickFile, switchLo, switchHi, lossLo, lossHi) DTLPairs = getDTLVals(pointList) DTLList = getCostscapeDTLs(DTLPairs, H, P, phi) newDTL = changeDTLScores(originalDTL, DTLList) return newDTL, numRecon, leaves
def main(): # read the train file from first arugment option = sys.argv[1] #Data Preprocessing DP.train() DP.dev() DP.test() if option == '0': # Make top-2000 CTF vectors CTF.train() CTF.dev() CTF.test() #train result test_RMLR.CTF_final() elif option == '1': # Make top-2000 DF vectors DF.train() DF.dev() DF.test() # train result test_RMLR.DF_final()
def run_test(fileName, max_k): cache_dir = './cache' D = 2. T = 3. L = 1. host, paras, phi = newickFormatReader.getInput(fileName) if not os.path.exists(cache_dir): os.makedirs(cache_dir) f = open('%s/README' % cache_dir, 'w') f.write( 'This directory holds a cache of reconciliation graph for the TreeLife data set' ) f.close() cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1]) if not os.path.isfile(cache_location): print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file' print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L) f = open(cache_location, 'w+') f.write(repr(DictGraph)) f.close() print >> sys.stderr, 'Loading reonciliation graph from cache' f = open(cache_location) DictGraph = eval(f.read()) f.close() scoresList, dictReps = Greedy.Greedy(DictGraph, paras) print >> sys.stderr, 'Found cluster representatives using point-collecting' graph = ReconGraph.ReconGraph(DictGraph) setReps = [ ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps ] random.seed(0) extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)] representatives = setReps + extra_reps print >> sys.stderr, 'Starting K Means algorithm ... ' print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step' for i in xrange(1, max_k + 1): print 'k = %d' % i KMeans.k_means(graph, 10, i, 0, representatives[:i])
def loaddata(args): nd = node.Node() nd.__init__() dp = DP.DataPrecess() train_data_voc, train_data_sentence_num = dp.buildvocab( path="raw.clean.train") # type:list label_voc = dp.buildvocab_label(path="classfication") train_data, train_lable = dp.readdata_d(path="raw.clean.train", shuffle=args.shuffle) dev_data, dev_lable = dp.readdata_d(path="raw.clean.dev", shuffle=args.shuffle) unusr, dev_sentence_num = dp.readdata_v(path="raw.clean.dev") print('train_sentence:', train_data_sentence_num, 'dev_sentence:', dev_sentence_num) nd.set_dev_data(dev_data) nd.set_dev_lable(dev_lable) nd.set_dev_sentence_num(dev_sentence_num) nd.set_train_data(train_data) nd.set_train_data_sentence_num(train_data_sentence_num) nd.set_train_lable(train_lable) nd.set_train_data_voc(train_data_voc) nd.set_label_voc(label_voc) if args.out_word_v is True: args.word_embed = add_word_v(train_data_voc) for i in range(len(args.word_embed)): if len(args.word_embed[i]) != 300: print('less 300') args.embed_num = len(train_data_voc) args.class_num = len(label_voc) args.cuda = (not args.no_cuda) and torch.cuda.is_available() del args.no_cuda args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] args.save_dir = os.path.join( args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): if attr == 'word_embed': continue print("\t{}={}".format(attr.upper(), value)) return nd
def getCostscapeDTLReconGraphs(DTLReconGraphPairs, hostTree, parasiteTree, \ phi): """This function takes as input DTLReconGraphPairs, a list of tuples with transfer and loss costs, and the hostTree, parasiteTree, and phi. It returns a list of DTLReconGraphs whose scores are computed with the transfer and loss values from each element in DTLReconGraphPairs.""" DTLReconGraphList = [] for cost in DTLReconGraphPairs: #assign those associated costs to the newDTLReconGraph newDTLReconGraph = DP(hostTree, parasiteTree, phi, 1, cost[0], \ cost[1])[0] DTLReconGraphList.append(newDTLReconGraph) return DTLReconGraphList
def makeInferenceForPlotting(N,cp,D): "grab MAP inferences under WANG" inference = sampleWang(D,cp) nCats = len(set(inference[1])) MAPMus, MAPSDs = [],[] for i in range(nCats): datasetCat = [] for j in range(len(inference[0])): if inference[1][j] == i: datasetCat.append(inference[0][j]) muCat, sigmaCat = DP.getMAP(datasetCat,paramsDict= modelParameters) MAPMus.append(muCat) MAPSDs.append(sigmaCat) return D, MAPMus, MAPSDs
def main(argv): try: if argv[1] == 'DP': value = DP.DPMethod(argv[2]) elif argv[1] == 'DQN': # InFile, batch_size, buffer_size, episodes_train, episodes_test, startTime value = DQN.DQNMethod(argv[2], int(argv[3]), int(argv[4]), int(argv[5]), int(argv[6]), time.process_time()) else: raise Exception print('value = {}'.format(value)) except: utils.printErrorAndExit('main')
def unitScoreDTL(hostTree, parasiteTree, phi, D, T, L): """ Takes a hostTree, parasiteTree, tip mapping function phi, and duplication cost (D), transfer cost (T), and loss cost (L) and returns the DTL graph in the form of a dictionary, with event scores set to 1. Cospeciation is assumed to cost 0. """ DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L) newDTL = {} for vertex in DTLReconGraph: newDTL[vertex] = [] for event in DTLReconGraph[vertex][:-1]: newEvent = event[:-1] + [1.0] newDTL[vertex].append(newEvent) newDTL[vertex].append(DTLReconGraph[vertex][-1]) return newDTL
def gethourdata(name): cursor = readarchiveddata(name,'hour').limit(10) p = {} index = 0 n0 = {} for i in cursor: n = str(time.localtime(i['hourtime'] * 3600).tm_hour) if n not in n0: n0[n] = index index = index + 1 temp = n0[n] for j in config.INFOS: if j not in p:p[j] = {} p[j][n] = [temp * 11,DP.percentify(name,j,i[j])] return p
def DQNMethod(InFile, batch_size, buffer_size, episodes_train, episodes_test, startTime): parameterDP, pdf, rf, demand = DP.readData(InFile) global T, N T = parameterDP.T N = parameterDP.N env = Environment(parameterDP, pdf, rf, demand) replayBuffer = ReplayBuffer(buffer_size) print('Training ...') train(env, nets, replayBuffer, batch_size, episodes_train, episodes_test, startTime) print('Testing ...') return test(env, nets, episodes_test, 0)
def getdaydata(name): cursor = readarchiveddata(name,'day').limit(10) p = {} index = 0 n0 = {} for i in cursor: a = time.localtime(i['daytime'] * 3600 * 24) n = str((a.tm_mon,a.tm_mday))[1:-1] if n not in n0: n0[n] = index index = index + 1 temp = n0[n] for j in config.INFOS: if j not in p:p[j] = {} p[j][n] = [index * 11,DP.percentify(name,j,i[j])] return p
def Reconcile(argList): """Takes command-line arguments of a .newick file, duplication, transfer, and loss costs, the type of scoring desired and possible switch and loss ranges. Creates Files for the host, parasite, and reconciliations""" fileName = argList[1] #.newick file D = float(argList[2]) # Duplication cost T = float(argList[3]) # Transfer cost L = float(argList[4]) # Loss cost freqType = argList[5] # Frequency type # Optional inputs if freqType == xscape switchLo = float(argList[6]) # Switch lower boundary switchHi = float(argList[7]) # Switch upper boundary lossLo = float(argList[8]) # Loss lower boundary lossHi = float(argList[9]) # Loss upper boundary host, paras, phi = newickFormatReader.getInput(fileName) hostRoot = cycleCheckingGraph.findRoot(host) hostv = cycleCheckingGraph.treeFormat(host) Order = orderGraph.date(hostv) # Default scoring function (if freqtype== Frequency scoring) DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L) print DTLReconGraph, numRecon #uses xScape scoring function if freqType == "xscape": DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \ switchHi, lossLo, lossHi, D, T, L) #uses Unit scoring function elif freqType == "unit": DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L) DTLGraph = copy.deepcopy(DTLReconGraph) scoresList, rec = Greedy.Greedy(DTLGraph, paras) for n in range(len(rec)): graph = cycleCheckingGraph.buildReconciliation(host, paras, rec[n]) currentOrder = orderGraph.date(graph) if currentOrder == "timeTravel": rec[n], currentOrder = detectCycles.detectCyclesWrapper( host, paras, rec[n]) currentOrder = orderGraph.date(currentOrder) hostOrder = hOrder(hostv, currentOrder) hostBranchs = branch(hostv, hostOrder) if n == 0: newickToVis.convert(fileName, hostBranchs, n, 1) else: newickToVis.convert(fileName, hostBranchs, n, 0) # filename[:-7] is the file name minus the .newick reconConversion.convert(rec[n], DTLReconGraph, paras, fileName[:-7], n)
def freqSummation(argList): """Takes as input an argument list containing a newick file of host and parasite trees as well as their phi mapping, duplication, transfer, and loss costs, the type of frequency scoring to be used, as well as switch and loss cost ranges for xscape scoring, and returns a file containing the list of scores for each individual reconciliation, the sum of the those scores, the total cost of those reconciliations and the number of reconciliations of those trees.""" newickFile = argList[1] D = float(argList[2]) T = float(argList[3]) L = float(argList[4]) freqType = argList[5] switchLo = float(argList[6]) switchHi = float(argList[7]) lossLo = float(argList[8]) lossHi = float(argList[9]) fileName = newickFile[:-7] f = open(fileName + "freqFile.txt", 'w') host, paras, phi = newickFormatReader.getInput(newickFile) DTL, numRecon = DP.DP(host, paras, phi, D, T, L) if freqType == "Frequency": newDTL = DTL elif freqType == "xscape": newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L) elif freqType == "unit": newDTL = MasterReconciliation.unitScoreDTL(host, paras, phi, D, T, L) scoresList, reconciliation = Greedy.Greedy(newDTL, paras) totalSum = 0 for score in scoresList: totalSum += score for index in reconciliation: totalCost = 0 for key in index: if index[key][0] == "L": totalCost += L elif index[key][0] == "T": totalCost += T elif index[key][0] == "D": totalCost += D f.write(str(scoresList) + '\n') f.write(str(totalSum) + '\n') f.write(str(totalCost) + '\n') f.write(str(numRecon)) f.close()
def IL(startMuA, startMuB, startSDA, startSDB, gens, N,cp=10.,parameters=modelParameters): D = sampleData(mus=[startMuA,startMuB],variances = [startSDA,startSDB],N=N) nCatData, musData, SDsData = [],[],[] for g in range(gens): inference = sampleWang(D,cp,parameters) #print "Inference:", inference nCats = len(set(inference[1])) nCatData.append(nCats) thisGenMus, thisGenSDs = [],[] for i in range(nCats): datasetCat = [] for j in range(len(inference[0])): if inference[1][j] == i: datasetCat.append(inference[0][j]) muCat, sigmaCat = DP.getMAP(datasetCat,parameters) thisGenMus.append(muCat) thisGenSDs.append(sigmaCat) musData.append(thisGenMus) SDsData.append(thisGenSDs) D = sampleData(mus=thisGenMus,variances = thisGenSDs,N=N) return nCatData,musData,SDsData
def Reconcile(argList): """Takes command-line arguments of a .newick file, duplication, transfer, and loss costs, the type of scoring desired and possible switch and loss ranges. Creates Files for the host, parasite, and reconciliations""" fileName = argList[1] #.newick file D = float(argList[2]) # Duplication cost T = float(argList[3]) # Transfer cost L = float(argList[4]) # Loss cost freqType = argList[5] # Frequency type # Optional inputs if freqType == xscape switchLo = float(argList[6]) # Switch lower boundary switchHi = float(argList[7]) # Switch upper boundary lossLo = float(argList[8]) # Loss lower boundary lossHi = float(argList[9]) # Loss upper boundary host, paras, phi = newickFormatReader.getInput(fileName) hostRoot = ReconciliationGraph.findRoot(host) # Default scoring function (if freqtype== Frequency scoring) DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L) #uses xScape scoring function # if freqType == "xscape": # DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \ # switchHi, lossLo, lossHi, D, T, L) #uses Unit scoring function if freqType == "unit": DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L) DTLGraph = copy.deepcopy(DTLReconGraph) scoresList, recs = Greedy.Greedy(DTLGraph, paras) infeasible_recs = [] for rec in recs: if orderGraph.date(ReconciliationGraph.buildReconciliation(host, paras, rec)) == False: infeasible_recs.append(rec) return infeasible_recs, recs
def setofaction(self, t, current_state): return DP.setofaction(t, self.parameterDP, current_state)
import DP import numpy as np import sys if __name__ == "__main__": map = np.loadtxt("../testcase/case" + sys.argv[1] + ".txt") terminal = np.loadtxt("../testcase/terminal" + sys.argv[1] + ".txt", dtype='i') size_row, size_col = map.shape it = iter(terminal) terminal = list(zip(it, it)) dp = DP.DP(size_row, size_col, map, terminal, -1) if (sys.argv[2] == "pi"): count = dp.policyIteration() print("Stop after " + str(count) + " iteration of policy iteration") else: count = dp.valueIteration() print("Stop after " + str(count) + " iteration of value iteration") dp.plot(sys.argv[1], sys.argv[2])
def HVC(X, p): n = len(X) l = n/p d = len(X[0]) clusters = [] for i in range(1, d): s = sorted(X, key=lambda x: x[i]) c = dp.cluster1D(s, len(s), i, p) clusters.append(c) #build relationship matrix #empty matrix [n, n] F = np.zeros((n, n)) #for each 1-d clustering for y in clusters: idx = 0 #take each cluster, with factor higher for earlier clusters while idx < len(y): c1 = y[idx] factor = len(y) - math.pow(6, idx) if factor < 0: factor = 0 #and for each pair in the cluster i1 = 0 while i1 < len(c1)-1: i2 = i1+1 while i2 < len(c1): #add the factor of the cluster to their partnership F[x, q] F[c1[i1][0], c1[i2][0]] += factor/(len(clusters)*p) F[c1[i2][0], c1[i1][0]] += factor/(len(clusters)*p) i2+=1 i1+=1 idx+=1 print F X = sorted(X, key=lambda student: student[0]) clusters = list() while len(clusters) < l: clusters.append(list()) first_round = True while len(clusters[l-1]) < p: #print "next round" for i in clusters: if first_round: while len(i) < 2: next_idx = rand.randint(0,n-1) if F[next_idx, 0] != -1: i.append(X[next_idx]) F[next_idx, :] = -1 else: maxIdx = 0 maxVal = 0 index = 0 while index < min(len(i), 2): test = i[index][0] possIdx = 0 for j in range(0, len(F[0])): # if possIdx[1] == test: # #F[possIdx[1], test] = -1 # possIdx = np.unravel_index(np.argmax(F[:, test]), (n,n)) possMax = F[j, test] if possMax >= maxVal: maxIdx = j maxVal = possMax index += 1 i.append(X[maxIdx]) F[maxIdx, :] = -1 first_round = False return clusters
num1 = state // 21 num2 = state % 21 actions = [] for a in range(self.action_size): moved = a - 5 if num1 + moved < 0 or num2 - moved < 0: continue actions.append(a) return actions def print_evaluation(self): print("value matrix") for i in range(21): for j in range(21): state_idx = (i * 21 + j) print("{:.3f}".format(self.v[state_idx]), end=' ') print() def print_improvement(self): print("policy matrix") for i in range(21): for j in range(21): state_idx = (i * 21 + j) print(self.p[state_idx], end=' ') print() if __name__ == "__main__": agent = Agent(21 * 21, 11, 0.9) method = DP.algo(agent, 0.0001) method.policy_iteration() #show=True)
def reward(self, state, action): if state == 0: return 0.0 else: return -1.0 def get_actions(self, state): return [i for i in range(self.action_size)] def print_evaluation(self): print("value matrix") for i in range(4): for j in range(4): state_idx = (i*4+j) % 15 print("{:.3f}".format(self.v[state_idx]), end=' ') print() def print_improvement(self): print("policy matrix") for i in range(4): for j in range(4): state_idx = (i*4+j) % 15 print(self.p[state_idx], end=' ') print() if __name__ == "__main__": test = myAgent(15, 4, 1.0) method = DP.algo(test, threshold=0.0001) method.policy_evaluation(show=True) # method.policy_iteration(show=True) # method.value_iteration(show=True)
attributes = list() attributes.append(nextId) while len(attributes) < d+1: meth = int(rand.normalvariate(normMean, normStdev)) #random number, 0-100 attributes.append(meth) nextStudent = tuple(attributes) dataset.append(nextStudent) nextId += 1 #sort the dataset by intelligence randomCopy = cp.deepcopy(dataset) dataset = sorted(dataset, key=lambda student: student[1]) originalCopy = cp.deepcopy(dataset) l1 = DP.cluster1D(dataset, size, 1, p) clusters = list() clusters.append(l1) dim = 2 while dim <= d: dataset = sorted(dataset, key=lambda student: student[dim]) l2 = DP.cluster1D(dataset, size, dim, p) clusters.append(l2) dim += 1 ##################################### # Cluster Ranking # # #
# to DP # DPParam = DP_fit(z_batch) # DPParam = np.ones((batch_size)) # gamma: 'LPMtx' (batch_size, # of cluster) # N : 'Nvec' (# of cluster, ) # m : 'm' (# of cluster, latent_dim) # W : 'B' (# of cluster, latent_dim, latent_dim) # v: 'nu' (# of cluster) # DPParam = DPObj.fit(z_batch) if dataset == 'firstBatch' or dataset == 'secondBatch': newinitname = results.initModelPath DPObj = DP.DP(output_path=fullOutputPath, initname=newinitname, gamma1=gamma1, gamma0=gamma0, sf=sf, nBatch=nBatch, taskID=taskID) DPParam, newinitname = DPObj.fitWithWarmStart(z_batch, newinitname) else: if epoch == 0 and iteration == 0: newinitname = 'randexamples' if dataset == 'reuters10k': DPObj = DP.DP(output_path=fullOutputPath, initname=newinitname, gamma1=gamma1, gamma0=gamma0, Kmax=Kmax, sf=sf, nBatch=nBatch,
## make full output path fullOutputPath = createOutputFolderName(outputPath, Kmax, dataset, epoch, batch_iter, scale, batchsize, rep, sf) ## name log file and write console output to log.txt logFileName = os.path.join(fullOutputPath, 'log.txt') if results.logFile: sys.stdout = open(logFileName, 'w') MNIST_df = XData(aa['z'], dtype='auto') ########################################################## ## create a DP object and get DPParam DPObj = DP.DP(output_path=fullOutputPath, initname='randexamples', gamma1=gamma1, gamma0=gamma0, Kmax=Kmax, sf=sf, nLap=nLap, taskID=taskID) DPParam, newinitname = DPObj.fit(aa['z']) ## after training model, get DPParam ######################################################### ## add evaluation summary metric and save results ######################################################### ## get z_fit from the encoder and fit with DP model to get all the labels for all training data z_fit = aa['z'] fittedY = obtainFittedYFromDP(DPParam, z_fit) #################################### ## Obtain the relationship between fittec class lable and true label, stored in a dictionary