def readSimulationData(dataFolder): trees = [] alleles = [] simulationDataIds = [] for subdir, dirs, files in os.walk(simulationFolder): if subdir == simulationFolder: #we are not interested in the root folder continue simulationDataIds.append(subdir) for file in files: #Also collect the real tree and inferred tree to compute the anscestry swap errors if re.match('RealTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile( file, subdir)[0] tree = eval(stringDict) realTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) trees.append(realTree) if re.match('RealA', file): currentAlleles = np.loadtxt(subdir + '/' + file, dtype='str') alleles.append(currentAlleles) return trees, alleles, simulationDataIds
def computeMST(fullGraph, vertexNames): newGraph = Graph(vertexNames, set(), []) newEdges = SpanningArborescence().computeMinimumSpanningArborescence( fullGraph, None) newVertices = vertexNames newGraph = Graph(newVertices, None, None) newGraph.setEdges(newEdges) #Update the node names in the edges updatedEdges = [] for edge in newGraph.edgeList: newEdge = (edge[0], vertexNames[edge[1]], vertexNames[edge[2]]) updatedEdges.append(newEdge) newGraph.edgeList = updatedEdges newGraph.edges = set(updatedEdges) return newGraph
def generateInitialTree(self, vertexNames, dists, aDists, samples, snvAnnotations, alleleAnnotations): #Generate a full tree with all the edges fullGraph = Graph(vertexNames, None, None) fullGraph.setEdgesFromDistances(dists) #Combine the edge annotations: allMessages = deepcopy(snvAnnotations) for k in alleleAnnotations.keys(): if k in allMessages: allMessages[k] = allMessages[k] + alleleAnnotations[k] else: allMessages[k] = alleleAnnotations[k] fullGraph.edgeAnnotations = allMessages print fullGraph print fullGraph.edgeList #Remove the edges for which we are based on the allele distances confident that these are not possible for sample1Ind in range(0, len(samples)): for sample2Ind in range(0, len(samples)): if aDists[sample1Ind, sample2Ind] == float("inf"): fullGraph.removeEdge((0, sample1Ind, sample2Ind)) return fullGraph
def generateInitialTree(dists, vertexNames): fullGraph = Graph(vertexNames, None, None) fullGraph.setEdgesFromDistances(dists) return fullGraph
def readDataIncludingPermutations(dataFolder, noiseLevels): groupedCErrors = dict() groupedAErrors = dict() groupedMuErrors = dict() groupedTreeErrors = dict() groupedAmbiguityErrors = dict() groupedEuclideanErrors = dict() groupedAverageAncestrySwapErrors = dict() for noiseLevel in noiseLevels: simulationFolder = dataFolder + '/snvs_' + str(noiseLevel) #Read all the errors into one list for this noise level cErrors = [] aErrors = [] muErrors = [] treeErrors = [] ambiguityErrors = [] ambiguityCorrectedErrors = [] averagedAncestrySwapError = [] treeSizes = [] for subdir, dirs, files in os.walk(simulationFolder): if subdir == simulationFolder: #we are not interested in the root folder continue for file in files: if re.match('cError', file): #read the file and obtain the error cErrors += collectErrorsFromFile(file, subdir) if re.match('aError', file): #read the file and obtain the error aErrors += collectErrorsFromFile(file, subdir) if re.match('muError', file): #read the file and obtain the error muErrors += collectErrorsFromFile(file, subdir) if re.match('treeError', file): #read the file and obtain the error treeErrors += collectErrorsFromFile(file, subdir) if re.match('RealTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile( file, subdir)[0] tree = eval(stringDict) realTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) treeSizes.append(len(realTree.edgeList)) if re.match('EstimatedTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile( file, subdir)[0] tree = eval(stringDict) inferredTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) [ ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred, noOfSamplePairs ] = computeTreeErrorOtherMetrics.computeAncestrySwapError( realTree, inferredTree) #Instead of reporting the actual errors, what if we report percentages of how bad we could have done? summedError = (ancestrySwapErrorAbsentInInferred + ancestrySwapErrorPresentInInferred) averagedAncestrySwapError.append(summedError / float(noOfSamplePairs)) #Gather the data per noise level groupedCErrors[noiseLevel] = cErrors groupedAErrors[noiseLevel] = aErrors groupedMuErrors[noiseLevel] = muErrors groupedTreeErrors[noiseLevel] = treeErrors groupedAmbiguityErrors[noiseLevel] = ambiguityCorrectedErrors groupedAverageAncestrySwapErrors[ noiseLevel] = averagedAncestrySwapError #Move this to a function to make it better #Also compute the Euclidean distance trees for each noise levels, add this as an additional error #Return the grouped data return [ groupedCErrors, groupedAErrors, groupedMuErrors, groupedTreeErrors, groupedAverageAncestrySwapErrors ]
muErrorFile = muErrorFiles[0] cError = collectErrorsFromFile(cErrorFile)[0] aError = collectErrorsFromFile(aErrorFile)[0] muError = collectErrorsFromFile(muErrorFile)[0] cErrors.append(cError) aErrors.append(aError) muErrors.append(muError) realTreeFile = glob(subdir + "/RealTrees_1.txt")[0] inferredTreeFile = glob(subdir + "/EstimatedTrees_1.txt")[0] stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile('RealTrees_1.txt', subdir)[0] tree = eval(stringDict) realTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile("EstimatedTrees_1.txt", subdir)[0] tree = eval(stringDict) inferredTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) #Compute the ancestry swap error [ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred, noOfSamplePairs] = computeTreeErrorOtherMetrics.computeAncestrySwapError(realTree, inferredTree) summedError = (ancestrySwapErrorAbsentInInferred + ancestrySwapErrorPresentInInferred) ancestrySwapError = summedError / float(noOfSamplePairs) treeErrors.append(ancestrySwapError) #Compute the pairwise error for each simulation
def readDataIncludingPermutations(dataFolder, noiseLevels): groupedCErrors = dict() groupedAErrors = dict() groupedMuErrors = dict() groupedTreeErrors = dict() groupedAncestrySwapErrors = dict() groupedAmbiguityErrors = dict() groupedPCErrors = dict() groupedPAErrors = dict() groupedPMuErrors = dict() groupedPTreeErrors = dict() groupedPAmbiguityErrors = dict() groupedPAncestrySwapErrors = dict() groupedEuclideanErrors = dict() for noiseLevel in noiseLevels: simulationFolder = dataFolder + '/snps_' + str(noiseLevel) #Read all the errors into one list for this noise level cErrors = [] aErrors = [] muErrors = [] treeErrors = [] ancestrySwapErrors = [] ambiguityErrors = [] ambiguityCorrectedErrors = [] pCErrors = [] pAErrors = [] pMuErrors = [] pTreeErrors = [] pAmbiguityErrors = [] pAmbiguityCorrectedErrors = [] pAncestrySwapErrors = [] realTree = None inferredTree = None treeSizes = [] for subdir, dirs, files in os.walk(simulationFolder): if subdir == simulationFolder: #we are not interested in the root folder continue if re.search('horizontalShuffle', subdir) is not None: for file in files: if re.match('cError', file): #read the file and obtain the error pCErrors += collectErrorsFromFile(file, subdir) if re.match('aError', file): #read the file and obtain the error pAErrors += collectErrorsFromFile(file, subdir) if re.match('muError', file): #read the file and obtain the error pMuErrors += collectErrorsFromFile(file, subdir) if re.match('treeError', file): #read the file and obtain the error pTreeErrors += collectErrorsFromFile(file, subdir) if re.match('RealTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile(file, subdir)[0] tree = eval(stringDict) realTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) treeSizes.append(len(realTree.edgeList)) if re.match('EstimatedTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile(file, subdir)[0] tree = eval(stringDict) inferredTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) #Compute the ancestry swap error [ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred, noOfSamplePairs] = computeTreeErrorOtherMetrics.computeAncestrySwapError(realTree, inferredTree) summedError = (ancestrySwapErrorAbsentInInferred + ancestrySwapErrorPresentInInferred) pAncestrySwapErrors.append(summedError / float(noOfSamplePairs)) else: for file in files: if re.match('cError', file): #read the file and obtain the error cErrors += collectErrorsFromFile(file, subdir) if re.match('aError', file): #read the file and obtain the error aErrors += collectErrorsFromFile(file, subdir) if re.match('muError', file): #read the file and obtain the error muErrors += collectErrorsFromFile(file, subdir) if re.match('treeError', file): #read the file and obtain the error treeErrors += collectErrorsFromFile(file, subdir) if re.match('RealTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile(file, subdir)[0] tree = eval(stringDict) realTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) treeSizes.append(len(realTree.edgeList)) if re.match('EstimatedTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile(file, subdir)[0] tree = eval(stringDict) inferredTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) #Compute the ancestry swap error [ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred, noOfSamplePairs] = computeTreeErrorOtherMetrics.computeAncestrySwapError(realTree, inferredTree) summedError = (ancestrySwapErrorAbsentInInferred + ancestrySwapErrorPresentInInferred) ancestrySwapErrors.append(summedError / float(noOfSamplePairs)) #Gather the data per noise level groupedCErrors[noiseLevel] = cErrors groupedAErrors[noiseLevel] = aErrors groupedMuErrors[noiseLevel] = muErrors groupedTreeErrors[noiseLevel] = treeErrors groupedAncestrySwapErrors[noiseLevel] = ancestrySwapErrors groupedAmbiguityErrors[noiseLevel] = ambiguityCorrectedErrors groupedPCErrors[noiseLevel] = pCErrors groupedPAErrors[noiseLevel] = pAErrors groupedPMuErrors[noiseLevel] = pMuErrors groupedPTreeErrors[noiseLevel] = pTreeErrors groupedPAncestrySwapErrors[noiseLevel] = pAncestrySwapErrors groupedPAmbiguityErrors[noiseLevel] = pAmbiguityCorrectedErrors #Move this to a function to make it better #Also compute the Euclidean distance trees for each noise levels, add this as an additional error #Return the grouped data print "tree errors: ", groupedTreeErrors print "shuffled tree errors: ", groupedPTreeErrors return [groupedCErrors, groupedAErrors, groupedMuErrors, groupedTreeErrors, groupedAncestrySwapErrors, groupedPCErrors, groupedPAErrors, groupedPMuErrors, groupedPTreeErrors, groupedPAncestrySwapErrors]
def readData(dataFolder, noiseLevels, addition): groupedCErrors = dict() groupedAErrors = dict() groupedMuErrors = dict() groupedTreeErrors = dict() groupedAverageAncestrySwapErrors = dict() for noiseLevel in noiseLevels: simulationFolder = dataFolder + "_" + str(noiseLevel) #Read all the errors into one list for this noise level cErrors = [] aErrors = [] muErrors = [] treeErrors = [] averagedAncestrySwapError = [] treeSizes = [] for subdir, dirs, files in os.walk(simulationFolder): if subdir == simulationFolder: #we are not interested in the root folder continue for file in files: if re.match('cError', file): #read the file and obtain the error cErrors += collectErrorsFromFile(file, subdir) if re.match('aError', file): #read the file and obtain the error aErrors += collectErrorsFromFile(file, subdir) if re.match('muError', file): #read the file and obtain the error muErrors += collectErrorsFromFile(file, subdir) if re.match('treeError', file): #read the file and obtain the error treeErrors += collectErrorsFromFile(file, subdir) if re.match('RealTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile( file, subdir)[0] tree = eval(stringDict) realTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) treeSizes.append(len(realTree.edgeList)) if re.match('EstimatedTrees', file): #read the file and obtain the error stringDict = computeTreeErrorOtherMetrics.collectErrorsFromFile( file, subdir)[0] tree = eval(stringDict) inferredTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) [ ancestrySwapErrorAbsentInInferred, ancestrySwapErrorPresentInInferred, noOfSamplePairs ] = computeTreeErrorOtherMetrics.computeAncestrySwapError( realTree, inferredTree) summedError = (ancestrySwapErrorAbsentInInferred + ancestrySwapErrorPresentInInferred) averagedAncestrySwapError.append(summedError / float(noOfSamplePairs)) #Gather the data per noise level groupedCErrors[noiseLevel] = cErrors groupedAErrors[noiseLevel] = aErrors groupedMuErrors[noiseLevel] = muErrors groupedTreeErrors[noiseLevel] = treeErrors groupedAverageAncestrySwapErrors[ noiseLevel] = averagedAncestrySwapError #Return the grouped data return [ groupedCErrors, groupedAErrors, groupedMuErrors, groupedTreeErrors, groupedAverageAncestrySwapErrors ]
def build_order_2(v,e): g=Graph(v)
def updateTree(self, fullGraph, allSomaticVariants, samples): #Copy the full graph, we iteratively update the full graph, but if there is no solution we can get back the original full graph and somatic variants (in case of precursors) originalGraph = deepcopy(fullGraph) originalSomaticVariants = deepcopy(allSomaticVariants) newSomaticVariants = deepcopy(allSomaticVariants) vertexNames = fullGraph.vertices #We go through the edges involved in causing ISA violation. #For either of these edges it may be efficient to place it elsewhere. This is the edge with the largest distance. #Remove this edge from the tree #Re-run Edmond's algorithm but then without this edge until the ISA is resolved. unresolved = False resolved = False removedEdge = None seenPrecursorSamples = [] edgeGone = False iter = 0 savedGraph = None #print "resolving the ISA" print "reconstructing tree: " #Rather than only storing the full score of the trees, we should keep a score indicating how many edges violate the ISA. #If we cannot solve the ISA for the tree, we report the tree which had the fewest number of violations. #For every list of trees associated with the # of violations, we can sort the trees by their weights. The tree with the smallest weight will be on top. treesAndIsaViolations = dict( ) #store the trees by the number of violations. We store the tree objects, these have weights associated. newGraph = Graph(vertexNames, set(), []) #In the case that we ignore SNVs, we do not need to resolve the ISA by checking bad edges. The first found tree is the solution. while resolved is False: #print fullGraph.getGraph() newEdges = SpanningArborescence( ).computeMinimumSpanningArborescence(fullGraph, newSomaticVariants) #maybe we should check if all edges are present. If not, then we also introduce a precursor if newEdges is not False: precursorNeeded = False childrenFound = [] for edge in newEdges: child = edge[2] for sampleInd in range(0, len(samples)): if child == sampleInd: childrenFound.append(child) if len(childrenFound) != ( len(samples) - 1 ): #we will always miss sample 0, this is a parent and not a child. print newEdges print "Warning: the tree misses nodes" precursorNeeded = True if newEdges is False and settings.trees['precursor'] is False: treesAndIsaViolations[float("inf")] = [] treesAndIsaViolations[float("inf")].append(deepcopy(newGraph)) unresolved = True break #We only attempt to introduce a precursor if this is specified in the settings. if newEdges is False or precursorNeeded is True and settings.trees[ 'precursor'] is True: #we end up here when there is no more possible tree. In this case, we need to reset the tree and add precursor nodes. [newEdges, newSomaticVariants ] = fullGraph.addPrecursorNode(originalGraph, originalSomaticVariants, samples) newVertices = deepcopy(vertexNames) newVertices.append(len(samples)) #add a new precursor state newGraph = Graph(newVertices, None, None) newGraph.setEdges(newEdges) newGraph.edgeAnnotations = fullGraph.edgeAnnotations if iter == 0: savedGraph = deepcopy(newGraph) iter += 1 badEdgeData = newGraph.checkIfTreeIsValid(newSomaticVariants) badEdges = badEdgeData[0] violatingEdges = badEdgeData[1] #In this case, we do not want to resolve the ISA and the first reported tree is the minimum distance tree. #Thus, the bad edges are None by default. badEdges = None if badEdges is None: resolved = True break #THe number of violating edges is a score for the trees. if len(violatingEdges) not in treesAndIsaViolations.keys(): treesAndIsaViolations[len(violatingEdges)] = [] treesAndIsaViolations[len(violatingEdges)].append( deepcopy(newGraph)) print "bad edges: ", badEdges #Remove the edge with the largest distance #we choose the edge that together (somvar * distance) has the worst score. currentLargestDist = -float("inf") currentWorstEdge = 0 if len(badEdges) > 0: edgeCounter = 0 for edge in badEdges: child = edge[2] parent = edge[1] print "current edge: ", edge #totalDistance = (math.exp(violatingWeights[edgeCounter])) * (edge[0]) totalDistance = edge[0] print "total distance: ", totalDistance if totalDistance > currentLargestDist: currentWorstEdge = edge currentLargestDist = totalDistance edgeCounter += 1 #remove the problematic edge print "removing edge: ", currentWorstEdge fullGraph.removeEdge(currentWorstEdge) #print "removing edge: ", currentWorstEdge #if newGraph is None: #sometimes we cannot resolve the ISA # newGraph = deepcopy(fullGraph) #check if the new graph contains all nodes, throw this warning only at the end childrenFound = [] for edge in newGraph.edges: child = edge[2] for sampleInd in range(0, len(samples)): if child == sampleInd: childrenFound.append(child) if len(childrenFound) != ( len(samples) - 1 ): #we will always miss sample 0, this is a parent and not a child. print "Warning: missing too many samples to resolve the ISA, reporting truncated trees" #we also need to check if all edges are there. A tree that is truncated at only one or two positions is not bad, we can still report this to the user and not place one or two nodes. #if many more nodes are missing, the tree does not make much sense anymore. Here we can check if most of the tree is missing. If we miss more than 80% of nodes, report the #minimum spanning tree instead of an empty/half-empty tree. #This step does not work if the alleles are also involved in the lack of nodes! Then we need to work with a completely different set of weights. minimumTreeContent = 0.8 if len(childrenFound) / (len(samples) - 1) < 0.8: print "Less than 80% of nodes are placed in the evolutionary tree. Reporting the minimum spanning tree instead, ISA is not resolved" message = "" if unresolved is True or len(childrenFound) / ( len(samples) - 1 ) < 0.8: #if we did not succeed with introducing a precursor we should also report the best tree print "Did not resolve the ISA, selecting the tree with the fewest violations" message = "Did not resolve the ISA, reporting the tree with the fewest ISA violations" #in this case we select the best tree. bestKey = float("inf") for violationKey in treesAndIsaViolations.keys(): if violationKey < bestKey: bestKey = violationKey #If the bestKey is infinite, we were unable to reconstruct a tree. if bestKey == float("inf"): bestTree = newGraph print "the best tree: ", bestTree.edgeList print "Did not find a correct tree" return bestTree #Obtain the set of trees with this number of violations bestTree = None bestTreeWeight = float("inf") for tree in treesAndIsaViolations[bestKey]: if tree.getTotalWeight() < bestTreeWeight: bestTree = tree bestTreeWeight = tree.getTotalWeight() #print "number of violations: ", bestKey newGraph = deepcopy( bestTree ) #use the first made graph, this is without edit operations and precursors that failed. return [newGraph, message]
def run(self, samples): eventDistances = self.eventDistances cCombinations = self.cCombinations #get all somatic variants in one binary numpy array allSomaticVariants = self.mergeVariantsIntoMatrix(samples) #set the segmentation samples = self.updateSampleSegmentation(samples) measurementLength = len(samples[0].measurements.measurements) #Define the original graph, here the parent is always the healthy cell for every subclone. vertexNames = [] #use this to convert back to the actual names later for sample in samples: vertexNames.append(sample.name) vertices = range(0, len(samples)) #Define the first graph, where node 0 is the parent of all subclones edgeList = [] for i in range(1, len(vertices)): edgeList.append((0, 0, i)) currentGraph = Graph(vertices, set(edgeList), edgeList) maxIterNum = settings.general['maximumIterations'] - 1 converged = False iteration = 0 graphList = [ ] #store all the graphs that we have made up until that point #Keep a table that we print to a file afterwards in which we store the edges that have been present and their occurrence at each iteration iterationGraphs = dict() edges = dict() iterationMu = dict() #store the mu of the samples per iteration iterationMessages = dict() while converged is not True: start_time = time.time() #This is where we get back after one iteration. The samples need to have their parents updated based on the current graph print "iteration: ", iteration #1. Infer the best C and mu combination per sample given our current tree samples = self.inferBestCMu(samples, currentGraph) #The best C and mu are stored for each sample, we wish to obtain this information for each sample individually and store it in a matrix that we can use to compute distances easily to infer a tree. [cMatrix, aMatrix] = self.getCAndAMatrices(samples) #Compute the distance matrix that we use to infer the tree [dists, aDists, sDists, snvAnnotations, alleleAnnotations] = self.computeDistanceMatrix(aMatrix, samples) #Generate the initial tree fullGraph = self.generateInitialTree(vertexNames, dists, aDists, samples, snvAnnotations, alleleAnnotations) #Update the tree to resolve the ISA [newGraph, message] = self.updateTree(fullGraph, allSomaticVariants, samples) #make a check for convergence, see if the current tree has the same edges as the newly inferred edges. Are the weights the same? Otherwise continue until a max. #we should keep the current graphs in a list and report them based on their scores. if iteration == maxIterNum: converged = True else: for previousGraph in graphList: if newGraph.compareIfGraphIsTheSame(previousGraph) is True: converged = True graphList.append(currentGraph) currentGraph = newGraph # newEdgeList = [] # for edge in newGraph.edgeList: #append the edges to the unique list, we will print this later # newEdge = (0, edge[1], edge[2]) # newEdgeList.append(newEdge) # if newEdge not in uniqueEdges: # uniqueEdges.append(newEdge) # # edges[iteration] = newEdgeList print "best graph: " print currentGraph.getGraph() #print the total distance in the graph as well print currentGraph.getTotalWeight() #Store the graph of each iteration by its weight. iterationGraphs[iteration] = currentGraph #Loop through the current mu (tumor fraction) of the samples, store these in a dictionary allMu = [] for sample in samples: allMu.append(sample.bestCMu[0].mu.mu[1]) iterationMu[iteration] = allMu iterationMessages[iteration] = message iteration += 1 iterationGraphs = self.updateTreeNodeNames(iterationGraphs, vertexNames) return [ cMatrix, aMatrix, samples, iterationGraphs, iterationMu, iterationMessages ] #are the samples references between classes? Otherwise we may not need to return it for the mu values.
eAMatrix = np.loadtxt(sys.argv[2] + '/aMatrix.txt', dtype=str) eMu = np.loadtxt(sys.argv[2] + '/EstimatedMu.txt', dtype=float) #Obtain the tree text_file = open(sys.argv[2] + '/EstimatedTree.txt', "r") lines = text_file.read() stringDict = [] for line in lines.split("\n"): if line != "": stringDict.append(line) text_file.close() tree = eval(stringDict[0]) eTree = Graph(tree['vertices'], set(tree['edges']), tree['edges']) #Also read the real matrices with the ground truth cMatrix = np.loadtxt(sys.argv[3] + '/RealC.txt', dtype=int) aMatrix = np.loadtxt(sys.argv[3] + '/RealA.txt', dtype=str) realMu = np.loadtxt(sys.argv[3] + '/RealMu.txt', dtype=float) #Obtain the tree text_file = open(sys.argv[3] + '/RealTree.txt', "r") lines = text_file.read() stringDict = [] for line in lines.split("\n"): if line != "": stringDict.append(line)