def set_up_competition(self, best_fit): #The competition takes place in the oldest generation. #competitors will consist of unions of pairs of lists contained #in the output of self.coalescent(). coalescent = self.coalescent() #A space is allocated in the memory to store the different competitors. coalescence_hypothesis = list() #The following loop makes the union of the first generation associated #with taxon t with the first generations associated with the taxa r in #best_fit[t]. for t in range(len(coalescent)): #The following ensures that if best_fit[t] is empty, then the first #generation of the phylogenesis of t is given. common_ancestor = _image_of_partition(coalescent[t]) #The following loop add the first generations of the friend of the #taxon t to the 'common ancestors. for r in best_fit[t]: common_ancestor = _image_of_partition(common_ancestor + coalescent[r]) #The list 'common_ancestor' to only give one representative to the #union it represents. common_ancestor.sort() coalescence_hypothesis.append(common_ancestor) #The list of competitors is returned, where each competitor is indexed #by the integer of the taxon it is supposed to represent. return coalescence_hypothesis
def _join_preimages_of_partitions(preimage1, preimage2, speed_mode): #Spaces are allocated in the memory so that the lists saved at the addresses #of the variables 'preimage1' and 'preimage2' are not modified. tmp1 = list() tmp2 = list() #In addition, repetitions that may occur in each internal list of the #two inputs are eliminated: e.g. [7,1,3,4,7] --> [7,1,3,4] for i in range(len(preimage1)): tmp1.append(_image_of_partition(preimage1[i])) for i in range(len(preimage2)): tmp2.append(_image_of_partition(preimage2[i])) #Reads preimage1; for i1 in range(len(tmp1)): #Reads in the i1-th internal lists of preimage1; for j1 in range(len(tmp1[i1])): #Reads preimage2; for i2 in range(len(tmp2)): #The variable flag indicates whether the value tmp1[i1][j1] #has been found in one of the internal lists of preimage2; flag = False #Reads in the i2-th internal lists of preimage2. for j2 in range(len(tmp2[i2])): #The j1-th element of the i1-th internal list of preimage1 #is found in preimage2, specifically at position j2 #of the i2-th internal list. if tmp1[i1][j1] == tmp2[i2][j2]: #The i2-th internal lists of preimage2 is appended #to the i1-th internal lists of preimage1. tmp1[i1].extend(tmp2[i2]) #The i2-th internal lists of preimage2 is emptied. tmp2[i2] = [] #Repeated elements occuring in the union of the two internal #lists, in preimage1, are eliminated. tmp1[i1] = _image_of_partition(tmp1[i1]) #the variable flag indicates whether the j1-th element of #the i1-th internal list of preimage1 was found in preimage2. flag = True break #tmp1[i1][j1] no longer needs to be searched in preimage2. if speed_mode == FAST and flag == True: break #On the one hand, the union of the first internal list of preimage1 #with all the other internal lists of preimage2 that intersect #it is appended to preimage2. tmp2.append(tmp1[i1]) #On the other hand, this union is emptied in preimage1. tmp1[i1] = [] #A space is allocated for the output of the procedure. the_join = list() #Only includes the non-empty lists of preimage2 in the output. for i in range(len(tmp2)): if tmp2[i] != []: the_join.append(tmp2[i]) #The output contains the non-empty lists of preimage2. return the_join
def make_friends(self, taxon): #The friendships are essentially formed at the level of the oldest #generation. Friendships will consist of unions of pairs of lists contained #in the output of self.coalescent(). coalescent = self.coalescent() #Allocates two spaces in the memory to store the output of the function: #- 'friends' will contain indices (i.e. the taxa that can be # related to the input taxon). #- 'coalescence_hypothesis' that contains the unions of the oldest # generation of 'taxon' with the oldest generation associated with an # individual in 'firends'. friends = list() coalescence_hypothesis = list() #The following loop fills in the lists 'friends' #and 'coalescence_hypothesis'. The list 'friends' contains all those #of the phylogeny that are not in coalescent[taxon]. The list #'coalescence_hypothesis' contains the union of coalescent[taxon] and #coalescent[r] for every index r in the list 'friends'. for r in range(len(coalescent)): if not (r in coalescent[taxon]): friends.append(r) #The union of coalescent[taxon] and coalescent[r] is computed through #_image_of_partition and then sorted in order to give a unique #representative to the union (e.g. [0,1]U[2,5] should be the same #as [2,5]U[0,1]. common_ancestor = _image_of_partition(coalescent[taxon] + coalescent[r]) common_ancestor.sort() coalescence_hypothesis.append(common_ancestor) #the procedure returns the list of friends for the input taxon and the #associated common ancestors stored in the list 'coalescence_hypothesis'. return (friends, coalescence_hypothesis)
def __init__(self,source,target,*args): if len(source) == len(target): #Relabeling the source and target by using _epi_factorize_partition #will allow us to quickly know whether there is an arrow from the source #and the target (see below). self.source = _epi_factorize_partition(source) self.target = _epi_factorize_partition(target) #The following line computes the binary relation that is supposed to #encode the function from the codomain of the underlying #epimorphism encoding the source partition to the codomain of the #epimorphsim encoding the target partition. self.arrow = _image_of_partition(zip(self.source,self.target)) #The following loop checks if the binary relation contained #in self.arrow is a function. for i in range(len(self.arrow)): #Checking the following condition is equivalent to checking #whether the label i in self.source is mapped to a unique element in #self.target, namely the value contained in self.arrow[i][1]. #Note that: the mapping might not be unique when the indexing of #the labels of the source partition is not compatible with that #of the target partition. if self.arrow[i][0]==i: #We are only interested in the image (not the graph) of the function. self.arrow[i] = self.arrow[i][1] else: if len(args) > 0 and args[0] == False: exit() else: print("Error: in MorphismOfPartitions.__init__: source and target are not compatible.") exit()
def _epi_factorize_partition(partition): #The relabeling depends on the cardinal of the image of the partition. #Computing the cardinal of the image is roughly the same as computing #the image itself. the_image = _image_of_partition(partition) #A space is allocated to contain the relabeled list. epimorphism = list() #If the i-th element of the list is the j-th element of the image #then this element is relabelled by the integer j. for i in range(len(partition)): for j in range(len(the_image)): if partition[i] == the_image[j]: epimorphism.append(j) break #Returns the relabeled list. return epimorphism
def _preimage_of_partition(partition): #A space is allocated in the memory to contain the preimage of the input list. the_preimage = list() #The number of fibers contained by the preimage is equal to the number #of elements in the image of the partition. for i in _image_of_partition(partition): #Allocates (empty) pointers in order to store the fibers of the partition. the_preimage.append([]) #The relabeled list of 'partition' gives the desired indexing #of the fibers contained in the preimage of partition. epimorphism = _epi_factorize_partition(partition) for i in range(len(epimorphism)): #The following line adds the integer i to the epimorphism[i]-th #fiber of the preimage. the_preimage[epimorphism[i]].append(i) #After the loop, all the fibers are filled and the preimage is returned return the_preimage
def score(self, partitions, friendship_network): #The following function will allow us to check if there exists a morphism #of partitions between two given lists (seen as partitions). def homset(partition1, partition2): try: MorphismOfPartitions(partition1, partition2, False) return True except: return False #The following function returns a Boolean value indicating whether two #lists are equal or disjoint. If this is not the case, False is returned. def exact_condition(list1, list2): intersection = list() for k in list1: if k in list2: intersection.append(k) #Either the two lists are disjoint. if intersection == []: return True #Or they are equal, which means that they are both equal to #their intersection. else: #The following lines check that list1 is included in the intersection. for k in list1: if not (k in intersection): return False #The following lines check that list2 is included in the intersection. for k in list2: if not (k in intersection): return False return True #STEP 1: #The variable 'score_matrix' will encode a tensor of dimension 3, #which means a list of lists of lists. Its coefficients, of the from #score_matrix[i][t][r] are defined for #- an index i indexing a partition in the list 'partitions' #- an index t indexing a list in friendship_network #- an index r indexing a taxon in friendship_network[0][t] #and they each contain a pair (flag,label) where # - 'label' is an integer representing the list stored in #friendship_network[1][t][r] (i.e. a hypothetical ancestor) labeled with #respect to all the other lists of friendship_network[1][t] up to #list equality, which means that if the list friendship_network[1][t][r] #is equal to the list friendship_network[1][s][r], then #score_matrix[i][t][r] and score_matrix[i][t][s] receive the same label. #- 'flag' is a Boolean value indicating whether the partitions indexed #by i in 'partitions' satisfies the exactness condition for the #hypothetical ancestor friendship_network[1][s][r]. score_matrix = list() #For convenience, the list of lists of lists friendship_network[1] is #renamed as 'hypotheses'. hypotheses = friendship_network[1] #The following loop gives labels to the different lists (i.e. the #hypothetical ancestors) in hypotheses in order to recognize them up #to list equality. labeling = list() for t in range(len(hypotheses)): labeling.append(_epi_factorize_partition(hypotheses[t])) #The following loop fills the coefficients of 'score_matrix' in. for i in range(len(partitions)): #The variable score_row will contain the rows of the matrix. score_row = list() #The following loop runs over the set of indices representing #each taxon 't' of the phylogeny. for t in range(len(hypotheses)): #The variable 'score_coalescence' will be used to compute #the component 'flag' of score_matrix[i][t][r]' while #the variable 'score_labeling' will be used to compute #the component 'label' of score_matrix[i][t][r]' score_coalescence = list() score_labeling = list() #The following loop runs over the set of indices representing the #taxa 'r' of the phylogeny that may possibly coalesce with 't'. for r in range(len(hypotheses[t])): #The variable 'x' contains the obvious partition of the set of taxa #whose only non-trivial part is the list of indices #representing the hypothetical ancestor 'hypotheses[t][r]'. x = EquivalenceRelation([hypotheses[t][r]], len(self.phylogeneses) - 1) #The following lines check whether there is a morphism of partitions #form 'x' to the partition partitions[i]. This condition will #later be referred to as the 'large score condition'. #i.e. x --> P(partitions[i]) if homset(x.quotient(), partitions[i]): #If the condition is satisfied, then the hypothetical ancestor #hypotheses[t][r] is stored in 'score_coalescence[r]' and #its label is stored in 'score_labeling[r]'. score_coalescence.append(hypotheses[t][r]) score_labeling.append(labeling[t][r]) #The following lines now construct the coefficients of the list #score_matrix[i][t] score_coeff = list() #By construction, the following loop runs over the set of indices #representing the taxa 'r' of the phylogeny that satisfy the #'large score condition' (see above). The goal is now to determine #which of these taxa also satisfy the 'exact score condition'. for r in range(len(score_coalescence)): #The variable 'flag' is the Boolean condition meant to be #stored in the pair score_matrix[i][t][r] and is meant to #indicate whether the 'exact score condition' is satisfied. flag = True #The following lines check whether 'r' satisfies the 'exact #score condition', which must be checked with respect to all #the other taxa 's' satisfying the 'large score condition'. for s in range(len(score_coalescence)): if s != r: flag = flag and exact_condition(\ score_coalescence[r],\ score_coalescence[s]) #As described above, the coefficient score_matrix[i][t][r] #is constructed as a pair (flag,label). score_coeff.append((flag, score_labeling[r])) #The list score_coeff corresponds to what is called the 'support #functor' in the mathematical version of the present work. #Also, since the images of the support functor are sets, we need to #consider the output of the procedure _image_of_partition(score_coeff) #instead of the list score_coeff itself since it may contain several #times the same list. #Use if needed: #print("[DEBUG] Support functor("+str((t,i))+"): " \ #+ str(_image_of_partition(score_coeff))) score_row.append(_image_of_partition(score_coeff)) score_matrix.append(score_row) #STEP 2: #The following lines integrate the tensor score_matrix[i][t][r] over #the indices i, namely the indices indexing the partitions #of 'partitions'. More specifically, the following lines count the number #of segments making the large and exact scores for a given ancestor #represented by a certain label 'l'. #Below, the variable 'score_cardinality' is meant to contain a matrix #that contains the large and exact score. score_cardinality = list() #The following loops initialize the matrix 'score_cardinality' #with null scores. for t in range(len(labeling)): row = list() #Note that the following loop runs over the image of labeling[t], #which means that only the representative of the hypothetical #ancestors is important and not the taxa 'r' they may be associated with. for l in range(len(_image_of_partition(labeling[t]))): #The first and second integer are the initial values for the large #and exact scores, respectively. row.append([0, 0]) score_cardinality.append(row) #The matrix 'score_cardinality' is now updated by counting the flags that #were set to False and True in the 3-dimensional tensor 'score_matrix'. for i in range(len(score_matrix)): for t in range(len(score_matrix[i])): for (f, l) in score_matrix[i][t]: if f == True: score_cardinality[t][l][ 1] = score_cardinality[t][l][1] + 1 score_cardinality[t][l][ 0] = score_cardinality[t][l][0] + 1 else: score_cardinality[t][l][ 0] = score_cardinality[t][l][0] + 1 #STEP 3: #The following lines are a copy of STEP 2 but where one produces a matrix #indexed by the 'friends' of the given taxon t instead of producing a #matrix indexed by the labels of the representative of the common #ancestors. Note that STEP 2 was essential for the count of the large and #exact scores, which are meant to be computed with respect to the #hypothetical ancestors and not the 'friends' of taxon t. friendships = friendship_network[0] score_cardinality_adjusted = list() for t in range(len(labeling)): row = list() #This time, the following line is not computed with respect to the #image of labeling[t]. for r in range(len(labeling[t])): row.append(()) score_cardinality_adjusted.append(row) for t in range(len(labeling)): for r in range(len(labeling[t])): score_cardinality_adjusted[t][r] = ( friendships[t][r], score_cardinality[t][labeling[t][r]][0], score_cardinality[t][labeling[t][r]][1]) #The procedure returns a triple (r,large,exact) where r runs over the #elements of friendships[t] where 'large' is the large score of the #possible ancestor hypotheses[t][r] and where 'exact' is the exact score #of the possible ancestor hypotheses[t][r]. return score_cardinality_adjusted
def extend(self, extension): #The variable indicates whether if the extension of the phylogeny #is 'complete', in the sense that all the lists l in 'extension' have #already been added in previous generations, which, in fact, #should also be the first ones. flag = False #The following loop checks all the lists l of 'extension' are already #appreaing in the first generations. for t in range(len(self.phylogeneses)): for i in range(len(extension)): #Checks if the list 'extension' requires to add a new generation #to the taxa t. Then the next 'if' tests whether the generation #is actually a new generation, adding new taxa to the phylogeny. if extension[i][0] == t: #The extension will provide a valid phylogeny if all the lists l #contains the first generation associated with the history of the #taxon t with which they are coupled. The following lines check #that this is the case. for j in self.phylogeneses[t].history[ len(self.phylogeneses[t].history) - 1]: if not (j in extension[i][1]): print( "Error: in Phylogeny.extend: the extension is not compatible with the phylogenesis of taxon " + str(t)) exit() #The following lines check whether the extension is actually adding #a new individual to the history of the taxon t. If this is not #the case for all the taxa of the extension, then the phylogeny #is considered to be already complete, so that the variable flag #is never changed to the value True. for j in extension[i][1]: #The following lines check if new individuals appear in #extension[i][1] in addition of those already in #self.phylogeneses[t].history. if not (j in self.phylogeneses[t].history[ len(self.phylogeneses[t].history) - 1]): #A new generation has been detected, the phylogeny is therefore #not complete and 'flag' is set to True. flag = True #The following condition holds whenever there is at least one phylogenesis #that is not complete. if flag == True: #The following lines add the new generation l of a pair #(t,l) in 'extension' to the taxa t. Otherwise, the first #generation of a taxa that do not appear in 'extension' #is repeated in its phylogenesis. for t in range(len(self.phylogeneses)): #The variable found_flag indicates whether the taxa t appears in #the first components of the pairs of the list 'extension' or not. found_flag = False for i in range(len(extension)): if extension[i][0] == t: #The procedure _image_of_partition is used to eliminate the #repetitions of integers that can occur in extension[i][1]. self.phylogeneses[t].history.append(\ _image_of_partition(extension[i][1])) found_flag = True break #The taxa was not associated with any list l in 'extension'. if found_flag == False: #The first generation is repeated (there is no repetition of #integer in this list). self.phylogeneses[t].history.append( self.phylogeneses[t].history[ len(self.phylogeneses[t].history) - 1]) #The following output indicates that the phylogeny was not complete, #and another run is necessary to determine if the phylogeny is now #completed. return True else: ##The following output indicates that the phylogeny is now complete. return False