def computeDiversityCoefficient(numberNodesInTree,sampleNameList,dataArray): #@sample is a list of (name of node,rank of node,sampleHitList) sample,_,numberNodes = takeNodesInTree(dataArray[7],sampleNameList) resultNodes = [] for node in sample: assignments = 0 if not (len(node) == 3): print " \n/!\ ERROR: List of samples wrong:",len(node),"." raise ValueError name,rank,sampleList = node[0],node[1],node[2] for hit in sampleList: if not (len(hit) == 2): print "\n/!\ ERROR: Sample Hit List wrong:",len(hit),"." raise ValueError assignments += hit[1] resultNodes.append(((name,rank),assignments)) if not numberNodes: if not (len(sample)): print "\n/!\ ERROR: [BUG] [percentage/computeSamplesInAllMetadatum] Empty set of samples." raise ValueError return 0,resultNodes if not numberNodesInTree: print "\n/!\ ERROR: Taxonomic Tree is empty: whole tree:",numberNodesInTree,"." raise ValueError return (numberNodes/numberNodesInTree),resultNodes
def compute(tree,sampleNameList1,sampleNameList2): nodeList1,numberAT1,numberN1 = takeNodesInTree(tree,sampleNameList1) nodeList2,numberAT2,numberN2 = takeNodesInTree(tree,sampleNameList2) common, in1, in2 = [],[],[] #@numberC is the number of nodes in common numberC = 0 for node in nodeList1: boolean,sampleHitList = memAndSampleHitList(node,nodeList2) #If @sampleHitList is not empty #That is if node also belongs to the second list of samples if boolean: common += [(node[0],node[1],node[2],sampleHitList)] numberC += 1 else: in1 += [node] for node in nodeList1: #@boolean answers: Is it a node that does not belong to the first list of samples? boolean = True for cNode in common: if (node[0] == cNode[0] and node[1] == cNode[1]): boolean = False if boolean: in2 += [node] return common,in1,in2,numberAT1,numberAT2,numberN1,numberN2,numberC
def enumerateCommonPatterns(tree,sampleNameList1,sampleNameList2): commonPatternsList = [] #@nodesList1 is the list of nodes (name,rank,sampleHitList) for sampleNameList1 (see misc.py) nodesList1,_,_ = takeNodesInTree(tree,sampleNameList1) #We consider every node of the tree as a potential root for a pattern for node in nodesList1: pattern = [] numberAssignments = 0 numberNodes = 0 name,rank = node[0],node[1] #Gets the subtree (of the whole taxonomic tree) rooted at node root = tree.search(name,rank) #@candidateNodes is the list of TaxoTree nodes that can potentially be added to the pattern candidateNodes = [root] while candidateNodes: child = candidateNodes.pop() sampleHitList = child.sampleHitList #Checking if child has been assigned in both samples isInSampleList1 = [] isInSampleList2 = [] for x in sampleHitList: if inSample(x,sampleNameList1): isInSampleList1.append(x) if inSample(x,sampleNameList2): isInSampleList2.append(x) #If both lists are not empty, then node has been assigned in both samples if isInSampleList1 and isInSampleList2: #Merge the elements of both lists, deleting duplicates #e.g. if OPNA-J90 belongs to both sampleNameLists it would corrupt the result #as the assignments in this sample to child would be duplicated #(assuming there is no duplicate in each list) isInSample = mergeList(isInSampleList1,isInSampleList2) pattern.append((child.name,child.rank)) numberNodes += 1 for x in isInSample: numberAssignments += x[1] candidateNodes += child.children #if the pattern is non-empty if pattern: commonPatternsList.append((pattern,numberAssignments,numberNodes)) return commonPatternsList
def enumerateSpecificPatterns(tree,sampleNameListPattern,sampleNameListOther): specificPatternsList = [] #List from where samples in both lists are deleted and only elements from sampleNameListPattern remain sampleNameListPatternTrimmed = trimList(sampleNameListPattern,sampleNameListOther) #@nodesList is the list of nodes (name,rank,sampleHitList) for sampleNameListPatternTrimmed nodesList,_,_ = takeNodesInTree(tree,sampleNameListPatternTrimmed) #Pretty much the same procedure than for @enumerateCommonPatterns for node in nodesList: pattern = [] numberAssignments = 0 numberNodes = 0 name,rank = node[0],node[1] root = tree.search(name,rank) candidateNodes = [root] while candidateNodes: child = candidateNodes.pop() sampleHitList = child.sampleHitList isInSampleListPattern = [] for x in sampleHitList: if inSample(x,sampleNameListPatternTrimmed): isInSampleListPattern.append(x) if inSample(x,sampleNameListOther): #This node is assigned in the samples of sampleNameListOther #so it is discarded from the pattern isInSamplePattern = [] break if isInSampleListPattern: pattern.append((child.name,child.rank)) numberNodes += 1 for x in isInSampleListPattern: numberAssignments += x[1] candidateNodes += child.children #if the pattern is non-empty if pattern: specificPatternsList.append((pattern,numberAssignments,numberNodes)) return specificPatternsList