def randomReconWrapper(dirName, D, T, L, numSamples, typeGen): """Takes in a directory of newick files, dirName, duplication, loss and transfer costs, the number of desired random reconciliations, and the type of generator (biased or uniform), and calls those random generators to build a file containing the number of temporal inconsistencies found in those randomly generated reconciliations as well as other information relating to the file""" totalTimeTravel = 0 # To record total number of time travels in directory outOf = 0 # To record total number of reconciliations made # loop through files in directory for fileName in os.listdir(dirName): if fileName.endswith('.newick'): f = open(fileName[:-7]+'.txt', 'w') f.write(typeGen+" random reconciliations"+"\n") hostTree, parasiteTree, phi = newickFormatReader.getInput\ (dirName+"/"+fileName) # find size of parasite and host trees parasiteSize = len(parasiteTree)+1 hostSize = len(hostTree)+1 DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L) rootList = rootGenerator(DTLReconGraph, parasiteTree) randomReconList = [] for n in range(numSamples): timeTravelCount = 0 startRoot = random.choice(rootList) if typeGen == "uniform": currentRecon = uniformRecon(DTLReconGraph, [startRoot], {}) else: normalizeDTL = normalizer(DTLReconGraph) currentRecon = biasedRecon(normalizeDTL, [startRoot], {}) for key in currentRecon.keys(): currentRecon[key] = currentRecon[key][:-1] randomReconList.append(currentRecon) # make sure there are no duplicate reconciliations uniqueReconList = [] for recon in randomReconList: if not recon in uniqueReconList: uniqueReconList.append(recon) outOf += len(uniqueReconList) for recon in uniqueReconList: graph = reconciliationGraph.buildReconstruction\ (hostTree, parasiteTree, recon) currentOrder = orderGraph.date(graph) numTrans = findTransfers(recon) if currentOrder == 'timeTravel': f.write("Temporal Inconsistency, reconciliation has "+str(numTrans)+" transfers"+"\n") timeTravelCount += 1 totalTimeTravel += 1 else: f.write("No temporal inconsistencies, reconciliation has "+str(numTrans)+" transfers"+"\n") f.write(fileName+" contains "+str(timeTravelCount)+" temporal "+ \ "inconsistencies out of "+ str(len(uniqueReconList))+ \ " reconciliations."+"\n"+"Total number of reconciliations: "+\ str(numRecon)+"\n"+"Host tree size: "+str(hostSize)+"\n"+\ "Parasite tree size: "+str(parasiteSize)+ "\n") f.close() print "Total fraction of temporal inconsistencies in directory: ", \ totalTimeTravel, '/', outOf
def getCostscapeDTLs(DTLPairs, hostTree, parasiteTree, phi): """This function takes as input DTLPairs, a list of tuples with T and L costs, and the hostTree, parasiteTree, and phi. It returns a list of DTLs who scores are computed with the T and L values from each element in DTLPairs.""" DTLList = [] for i in DTLPairs: newDTL = DP(hostTree, parasiteTree, phi, 1, i[0], i[1])[0] DTLList.append(newDTL) return DTLList
def newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L): """This function takes as input hostTree, parasiteTree, phi, duplication cost D, transfer cost T, and loss cost L, and returns the newDTL whose scores were calculated from costscape.""" H, P, phi = newickFormatReader.getInput(newickFile) originalDTL, numRecon, leaves = DP(H, P, phi, D, T, L) pointList = findCenters(newickFile, switchLo, switchHi, lossLo, lossHi) DTLPairs = getDTLVals(pointList) DTLList = getCostscapeDTLs(DTLPairs, H, P, phi) newDTL = changeDTLScores(originalDTL, DTLList) return newDTL, numRecon, leaves
def run_test(fileName, max_k): cache_dir = './cache' D = 2. T = 3. L = 1. host, paras, phi = newickFormatReader.getInput(fileName) if not os.path.exists(cache_dir): os.makedirs(cache_dir) f = open('%s/README' % cache_dir, 'w') f.write( 'This directory holds a cache of reconciliation graph for the TreeLife data set' ) f.close() cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1]) if not os.path.isfile(cache_location): print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file' print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L) f = open(cache_location, 'w+') f.write(repr(DictGraph)) f.close() print >> sys.stderr, 'Loading reonciliation graph from cache' f = open(cache_location) DictGraph = eval(f.read()) f.close() scoresList, dictReps = Greedy.Greedy(DictGraph, paras) print >> sys.stderr, 'Found cluster representatives using point-collecting' graph = ReconGraph.ReconGraph(DictGraph) setReps = [ ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps ] random.seed(0) extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)] representatives = setReps + extra_reps print >> sys.stderr, 'Starting K Means algorithm ... ' print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step' for i in xrange(1, max_k + 1): print 'k = %d' % i KMeans.k_means(graph, 10, i, 0, representatives[:i])
def unitScoreDTL(hostTree, parasiteTree, phi, D, T, L): """ Takes a hostTree, parasiteTree, tip mapping function phi, and duplication cost (D), transfer cost (T), and loss cost (L) and returns the DTL graph in the form of a dictionary, with event scores set to 1. Cospeciation is assumed to cost 0. """ DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L) newDTL = {} for vertex in DTLReconGraph: newDTL[vertex] = [] for event in DTLReconGraph[vertex][:-1]: newEvent = event[:-1] + [1.0] newDTL[vertex].append(newEvent) newDTL[vertex].append(DTLReconGraph[vertex][-1]) return newDTL
def getCostscapeDTLReconGraphs(DTLReconGraphPairs, hostTree, parasiteTree, \ phi): """This function takes as input DTLReconGraphPairs, a list of tuples with transfer and loss costs, and the hostTree, parasiteTree, and phi. It returns a list of DTLReconGraphs whose scores are computed with the transfer and loss values from each element in DTLReconGraphPairs.""" DTLReconGraphList = [] for cost in DTLReconGraphPairs: #assign those associated costs to the newDTLReconGraph newDTLReconGraph = DP(hostTree, parasiteTree, phi, 1, cost[0], \ cost[1])[0] DTLReconGraphList.append(newDTLReconGraph) return DTLReconGraphList
def freqSummation(argList): """Takes as input an argument list containing a newick file of host and parasite trees as well as their phi mapping, duplication, transfer, and loss costs, the type of frequency scoring to be used, as well as switch and loss cost ranges for xscape scoring, and returns a file containing the list of scores for each individual reconciliation, the sum of the those scores, the total cost of those reconciliations and the number of reconciliations of those trees.""" newickFile = argList[1] D = float(argList[2]) T = float(argList[3]) L = float(argList[4]) freqType = argList[5] switchLo = float(argList[6]) switchHi = float(argList[7]) lossLo = float(argList[8]) lossHi = float(argList[9]) fileName = newickFile[:-7] f = open(fileName + "freqFile.txt", 'w') host, paras, phi = newickFormatReader.getInput(newickFile) DTL, numRecon = DP.DP(host, paras, phi, D, T, L) if freqType == "Frequency": newDTL = DTL elif freqType == "xscape": newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L) elif freqType == "unit": newDTL = MasterReconciliation.unitScoreDTL(host, paras, phi, D, T, L) scoresList, reconciliation = Greedy.Greedy(newDTL, paras) totalSum = 0 for score in scoresList: totalSum += score for index in reconciliation: totalCost = 0 for key in index: if index[key][0] == "L": totalCost += L elif index[key][0] == "T": totalCost += T elif index[key][0] == "D": totalCost += D f.write(str(scoresList) + '\n') f.write(str(totalSum) + '\n') f.write(str(totalCost) + '\n') f.write(str(numRecon)) f.close()
def Reconcile(argList): """Takes command-line arguments of a .newick file, duplication, transfer, and loss costs, the type of scoring desired and possible switch and loss ranges. Creates Files for the host, parasite, and reconciliations""" fileName = argList[1] #.newick file D = float(argList[2]) # Duplication cost T = float(argList[3]) # Transfer cost L = float(argList[4]) # Loss cost freqType = argList[5] # Frequency type # Optional inputs if freqType == xscape switchLo = float(argList[6]) # Switch lower boundary switchHi = float(argList[7]) # Switch upper boundary lossLo = float(argList[8]) # Loss lower boundary lossHi = float(argList[9]) # Loss upper boundary host, paras, phi = newickFormatReader.getInput(fileName) hostRoot = cycleCheckingGraph.findRoot(host) hostv = cycleCheckingGraph.treeFormat(host) Order = orderGraph.date(hostv) # Default scoring function (if freqtype== Frequency scoring) DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L) print DTLReconGraph, numRecon #uses xScape scoring function if freqType == "xscape": DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \ switchHi, lossLo, lossHi, D, T, L) #uses Unit scoring function elif freqType == "unit": DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L) DTLGraph = copy.deepcopy(DTLReconGraph) scoresList, rec = Greedy.Greedy(DTLGraph, paras) for n in range(len(rec)): graph = cycleCheckingGraph.buildReconciliation(host, paras, rec[n]) currentOrder = orderGraph.date(graph) if currentOrder == "timeTravel": rec[n], currentOrder = detectCycles.detectCyclesWrapper( host, paras, rec[n]) currentOrder = orderGraph.date(currentOrder) hostOrder = hOrder(hostv, currentOrder) hostBranchs = branch(hostv, hostOrder) if n == 0: newickToVis.convert(fileName, hostBranchs, n, 1) else: newickToVis.convert(fileName, hostBranchs, n, 0) # filename[:-7] is the file name minus the .newick reconConversion.convert(rec[n], DTLReconGraph, paras, fileName[:-7], n)
def Reconcile(argList): """Takes command-line arguments of a .newick file, duplication, transfer, and loss costs, the type of scoring desired and possible switch and loss ranges. Creates Files for the host, parasite, and reconciliations""" fileName = argList[1] #.newick file D = float(argList[2]) # Duplication cost T = float(argList[3]) # Transfer cost L = float(argList[4]) # Loss cost freqType = argList[5] # Frequency type # Optional inputs if freqType == xscape switchLo = float(argList[6]) # Switch lower boundary switchHi = float(argList[7]) # Switch upper boundary lossLo = float(argList[8]) # Loss lower boundary lossHi = float(argList[9]) # Loss upper boundary host, paras, phi = newickFormatReader.getInput(fileName) hostRoot = ReconciliationGraph.findRoot(host) # Default scoring function (if freqtype== Frequency scoring) DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L) #uses xScape scoring function # if freqType == "xscape": # DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \ # switchHi, lossLo, lossHi, D, T, L) #uses Unit scoring function if freqType == "unit": DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L) DTLGraph = copy.deepcopy(DTLReconGraph) scoresList, recs = Greedy.Greedy(DTLGraph, paras) infeasible_recs = [] for rec in recs: if orderGraph.date(ReconciliationGraph.buildReconciliation(host, paras, rec)) == False: infeasible_recs.append(rec) return infeasible_recs, recs
# to DP # DPParam = DP_fit(z_batch) # DPParam = np.ones((batch_size)) # gamma: 'LPMtx' (batch_size, # of cluster) # N : 'Nvec' (# of cluster, ) # m : 'm' (# of cluster, latent_dim) # W : 'B' (# of cluster, latent_dim, latent_dim) # v: 'nu' (# of cluster) # DPParam = DPObj.fit(z_batch) if dataset == 'firstBatch' or dataset == 'secondBatch': newinitname = results.initModelPath DPObj = DP.DP(output_path=fullOutputPath, initname=newinitname, gamma1=gamma1, gamma0=gamma0, sf=sf, nBatch=nBatch, taskID=taskID) DPParam, newinitname = DPObj.fitWithWarmStart(z_batch, newinitname) else: if epoch == 0 and iteration == 0: newinitname = 'randexamples' if dataset == 'reuters10k': DPObj = DP.DP(output_path=fullOutputPath, initname=newinitname, gamma1=gamma1, gamma0=gamma0, Kmax=Kmax, sf=sf, nBatch=nBatch,
import DP import numpy as np import sys if __name__ == "__main__": map = np.loadtxt("../testcase/case" + sys.argv[1] + ".txt") terminal = np.loadtxt("../testcase/terminal" + sys.argv[1] + ".txt", dtype='i') size_row, size_col = map.shape it = iter(terminal) terminal = list(zip(it, it)) dp = DP.DP(size_row, size_col, map, terminal, -1) if (sys.argv[2] == "pi"): count = dp.policyIteration() print("Stop after " + str(count) + " iteration of policy iteration") else: count = dp.valueIteration() print("Stop after " + str(count) + " iteration of value iteration") dp.plot(sys.argv[1], sys.argv[2])
## make full output path fullOutputPath = createOutputFolderName(outputPath, Kmax, dataset, epoch, batch_iter, scale, batchsize, rep, sf) ## name log file and write console output to log.txt logFileName = os.path.join(fullOutputPath, 'log.txt') if results.logFile: sys.stdout = open(logFileName, 'w') MNIST_df = XData(aa['z'], dtype='auto') ########################################################## ## create a DP object and get DPParam DPObj = DP.DP(output_path=fullOutputPath, initname='randexamples', gamma1=gamma1, gamma0=gamma0, Kmax=Kmax, sf=sf, nLap=nLap, taskID=taskID) DPParam, newinitname = DPObj.fit(aa['z']) ## after training model, get DPParam ######################################################### ## add evaluation summary metric and save results ######################################################### ## get z_fit from the encoder and fit with DP model to get all the labels for all training data z_fit = aa['z'] fittedY = obtainFittedYFromDP(DPParam, z_fit) #################################### ## Obtain the relationship between fittec class lable and true label, stored in a dictionary