def refineReconciledTreeWithTransferBack(RT): """ adds transferBack events where they were omitted Takes: - RT (ReconciledTree) : a reconciled tree obtained from an ale string """ for n in RT.traverse(): if n.is_root(): continue lastEventParent = n.up.getEvent(-1) if lastEventParent.eventCode in ["speciationOut", "So"]: ## parent is an outgoing transfer firstEvent = n.getEvent(0) if firstEvent.species == lastEventParent.species: continue ## this is the "kept" child --> continue if firstEvent.eventCode in [ "Tb", "transferBack", "Bo", "bifurcationOut" ]: continue ## already the correct annotation TbEvent = RecEvent("Tb", species=firstEvent.species) n.addEvent(TbEvent, append=False)
def refineReconciledTreeLosses(RT, spTree): """ adds species to the losses Takes: - RT (ReconciledTree) : a reconciled tree obtained from an ale string - spTree (ete3.Tree) : a species tree """ for n in RT.traverse(): if n.is_root(): continue firstEvent = n.getEvent(0) if first.eventCode in ["L", "loss"]: ## loss! lastEventParent = n.up.getEvent(-1) if firstEvent.species == lastEventParent.species: continue ## this is the "kept" child --> continue if firstEvent.eventCode in [ "Tb", "transferBack", "Bo", "bifurcationOut" ]: continue ## already the correct annotation TbEvent = RecEvent("Tb", species=firstEvent.species) n.addEvent(TbEvent, append=False)
def PrIMEAnnotationToRecEvent(annot): """ returns a RecEvent OR None in case of problem """ evtType = None name = annot.get("name", None) sp = None if not name is None: sp = name.rpartition("_")[2] Vtype = annot.get("VERTEXTYPE", None) if not Vtype is None: if Vtype == "Duplication": evtType = "D" elif Vtype == "Speciation": evtType = "S" ## may be a SL. we check this later as this info is in the child elif Vtype == "Transfer": evtType = "So" ## we will have to add the transferBack later to the child elif Vtype == "Loss": evtType = "L" elif Vtype == "Leaf": evtType = "C" else: print "UNKNOWN EVENT TYPE", Vtype, "!" if (sp is None) or (evtType is None): print "error when trying to assign event. data:", annot return None addInfo = {} if evtType == "C": addInfo["geneName"] = name return RecEvent(evtType, sp, additionnalInfo=addInfo)
def NotungAnnotationToRecEvent(annot): """ returns a RecEvent OR None in case of problem """ evtType = None sp = annot.get("S",None) if annot.has_key("D"): # duplication if annot["D"] == "Y": evtType = "D" if annot.has_key("H"):# transfer reception if annot["H"].startswith("Y"): evtType = "Tb" ## the destination species is already in the S field. The sending species #sHinfo = annot["H"].split("@") ## typical line : 'Y@MOUSE@GORILLA' for a transfer from mouse to gorilla if evtType is None: # speciation, speciationOut or Loss or leaf! if annot["name"].endswith("*LOST"): evtType = "L" else: evtType = "S" ## between leaf, S and So, always put S and we'll complement later if ( sp is None ) or (evtType is None): print "error when trying to assign event. data:", annot return None return RecEvent(evtType,sp)
def parse_eventsRec(self, element, obsoleteTagsBehaviour = 1 ): """ *recursive funtion* Takes: - element (Element) : element with the "eventsRec" tag - obsoleteTagsBehaviour (int) [default = 1]: 0 : ignore 1 : warning 2 : throw exception Returns: None : error or (ReconciledTree) : the reconciled tree """ TAG = "eventsRec" if not self.isOfTag(element, TAG): raise Exception('BadTagException. The element is of tag ' + element.tag + " instead of " + TAG + "." ) children = element.getchildren() events = [] for ch in children: evtCode = self.tagCorrection( ch.tag ) if obsoleteTagsBehaviour>0: if evtCode in OBSOLETE_EVENT_TAGS: print OBSOLETEWARNINGTXT(evtCode) if obsoleteTagsBehaviour>1: raise Exception("ERROR. obsolete tag " + evtCode + " encoutered") evtCode = REVERSE_EVENTTAGCORRESPONDANCE.get(evtCode, evtCode) ## replace by special code when known tag, otherwise keep as is species = None speciesTAGs = ["destinationSpecies" , "speciesLocation"] ts = None tsTAG = "ts" additionnalInfo = {} it = ch.items() for k,v in it: if k in speciesTAGs: species = v elif k == tsTAG: ts = int(v) else: additionnalInfo[k] = v evt = RecEvent(evtCode , species, ts, additionnalInfo) events.append(evt) return events
def addSpeciationAndLoss(node, keptSpeciesNode): """ *modifies node in place* Takes: - node (ReconciledTree): node where a SpeciationLoss must take place - keptSpeciesNode (ete3.TreeNode): node of the species tree where the lineage survived (ie. the sister species of the one where the loss occured) """ parentSpeciesNode = keptSpeciesNode.up lossSpeciesNode = getSister(keptSpeciesNode) lossName = "" if lossSpeciesNode != None: lossName = lossSpeciesNode.name lossNode = ReconciledTree() lossNode.addEvent(RecEvent("loss", lossSpeciesNode.name)) lossNode.name = "-" # 2. create the kept child keptNode = ReconciledTree() ##transfering the events of node to keptNode while len(node.eventRecs) > 0: keptNode.addEvent(node.popEvent(0), append=False) tmp = keptNode.name keptNode.name = node.name node.name = tmp # 3. link children to kept child while len(node.children) > 0: c = node.children[0] c.detach() keptNode.add_child(c) # 4. branching loss and kept to original node node.add_child(lossNode) node.add_child(keptNode) # 5. editing the event e = RecEvent("S", keptSpeciesNode.up.name) node.addEvent(e, append=False) ##will insert the evt in first position return
def MakeLossIndependentNode(node, LossIndex, lostSpecies="", lostTS=None, lostAdditional={}, keptChildNameSuffix=".c"): """ *modifies node* Takes: - node (ReconciledTree): reconciled node where the *Loss event occurs - LossIndex (int): index of the speciationLoss or branchingOutLoss event - lostSpecies (str) [default = ""] : species of the loss - lostTS (int) [default = None]: timeSlice is the loss - lostAdditional [default = {}]: additional information to give to the new loss event - keptChildNameSuffix (str) [default = ".c"] : suffix to add to the name of the new child of node that is NOT a loss """ #print( MakeLossIndependentNode , node , lostTS ) # 1. create the loss child lossNode = ReconciledTree() lossNode.addEvent( RecEvent("loss", lostSpecies, ts=lostTS, additionnalInfo=lostAdditional)) lossNode.name = "LOSS" # 2. create the kept child keptNode = ReconciledTree() keptNode.name = node.name + keptChildNameSuffix while len(node.eventRecs) > (LossIndex + 1): #print LossIndex, LossIndex+1 , len(node.eventRecs) keptNode.addEvent(node.popEvent(LossIndex + 1)) # 3. link children to kept child while len(node.children) > 0: c = node.children[0] c.detach() keptNode.add_child(c) # 4. branching loss and kept to original node node.add_child(lossNode) node.add_child(keptNode) # 5. editing the event e = node.eventRecs[LossIndex].eventCode node.eventRecs[LossIndex].eventCode = e[:-1] return
def NHXtreeToBasicRecTree(nhxTree, spTree=None): """ *RECURSIVE* From a tree read in a NHX format file to a ReconciledTree without any intermediary events (SpeciationLoss events) Takes: - nhxTree (ete3.TreeNode) : tree read from a NHX formatted line - spTree (ete3.Tree or None) [default = None] : if different from None, internal node's events associated to species whose name is in the species tree will be kept as such if equal to None, only leaves get to keep their associated species (and species of other events will have to be re-associated later) Returns: (ReconciledTree) """ RT = ReconciledTree() eventCode = None species = None ## only terminal events in a DL context are considered here : leaf, speciation or duplication if nhxTree.is_leaf(): eventCode = "C" species = nhxTree.S ## we only get the species for the leaves ##( as they are the only one where we are sure the species is one that is present in the species tree) elif nhxTree.D == "Y": eventCode = "D" else: eventCode = "S" if not spTree is None: if len(spTree.search_nodes( name=nhxTree.S)) == 1: ## not at all efficient ... species = nhxTree.S ##additional info: for f in nhxTree.features: RT.add_feature(f, nhxTree.__getattribute__( f)) ## stinks that we call thi __method__ ... evt = RecEvent(eventCode, species) RT.addEvent(evt) for c in nhxTree.children: RT.add_child(NHXtreeToBasicRecTree(c)) return RT
def refineReconciledTreeWithTransferBack(RT): """ adds transferBack events where they were omitted Takes: - RT (ReconciledTree) : a reconciled tree obtained from an ale string """ for n in RT.traverse(): if n.is_root(): continue lastEventParent = n.up.getEvent(-1) print [x.eventCode+","+x.species for x in n.eventRecs] if lastEventParent.eventCode in ["branchingOut", "bro","branchingOutLoss", "broL"]: ## parent is an outgoing transfer firstEvent = n.getEvent(0) print lastEventParent.eventCode , lastEventParent.species , "->", print firstEvent.eventCode , firstEvent.species if firstEvent.species == lastEventParent.species: continue ## this is the "kept" child --> continue if firstEvent.eventCode in ["Tb", "transferBack","Bo","bifurcationOut"]: continue ## already the correct annotation print " creating event" TbEvent = RecEvent("Tb" , species = firstEvent.species) n.addEvent( TbEvent , append = False)
def parse_node_annotation(node_annotation, isLeaf=False, isDead=False, isUndated=False): """ Takes: - node_annotation (str): reconciliation annotation on a node Returns: (list): list of dicts coding a particular event """ #print "annot : ",node_annotation l_events = [] if len(node_annotation) != 0: if node_annotation.startswith("."): node_annotation = node_annotation[1:] tmp_ann = node_annotation.split(".") ##further splitting multiple transfer s_ann = [] for ann in tmp_ann: if ann.count("@") < 1: s_ann.append(ann) continue ## in case of transfer and loss like: T@27|SYNP2@26|ACAM1 new_anns = ann.split("@") s_ann.append("@".join( new_anns[0:2])) ##first tranfer, a transfer out for a in new_anns[ 2:]: ##for each transfer after that (should be only one) s_ann.append("@" + a) print(node_annotation, "->", s_ann) for ann in s_ann: if ann[0].isdigit(): ##starts with a number spe,dup or spe+loss if ann.isdigit(): ##only numbers: spe or spe+loss target = ann ts = int(target) if isUndated: ts = None l_events.append(RecEvent("S", target, ts)) continue if ann.startswith("T@"): ##Transfer out ## time slice of the source source_ts = None source_sp = None if isUndated: ## of the shape : "T@D->A" source_sp = ann[2:].partition("->")[0] else: source_ts, junk, source_sp = ann[2:].partition( "|") ## partitionning something like T@22|22 source_ts = int(source_ts) ##adding the event l_events.append(RecEvent("So", source_sp, source_ts)) if ann.startswith( "@"): # or ann.startswith("Tb@"):##transfer in or back pre = 3 ##cropping size if ann.startswith("@"): pre = 1 target_ts, junk, target_sp = ann[pre:].partition( "|" ) ## partitionning something like @22|22 to get the time slice and specie ##adding the event l_events.append(RecEvent("Tb", target_sp, int(target_ts))) if ann.startswith("Tb@"): l_events.append(RecEvent("Bo", "-1")) if ann.startswith("D@"): ##Duplication ts = None sp = None if isUndated: sp = ann[2:] else: ts, junk, sp = ann[2:].partition( "|") ## partitionning something like D@23|22 ts = int(ts) l_events.append(RecEvent("D", sp, ts)) if isLeaf and (len(l_events) == 0 or l_events[-1].eventCode != "C"): ts = 0 if isUndated: ts = None l_events.append(RecEvent( "C", None, ts)) ##temporary placeholder for the leaf species if isDead: ## we start in the dead so the first event MUST be Bout or Tloss if not l_events[0].eventCode in ["Tb", "Bo"]: target_ts = l_events[0].timeSlice target_sp = l_events[0].species l_events.insert(0, RecEvent("Tb", target_sp, target_ts)) ##adding loss labels for i in range(len(l_events) - 1): ##all events but the last one if l_events[i].eventCode in ["So", "S"]: l_events[i].eventCode += "L" return l_events
def addTbEvent(node): """ adds a transferBack event at the beginning of a nodes chain of event""" sp = node.getEvents()[0].species tbEvent = RecEvent("Tb", sp) node.addEvent(tbEvent, append=False)
def AddLeafEvent(node): """ adds a Leaf event to the node """ sp = node.getEvents()[-1].species node.addEvent( RecEvent("C",sp) ) node.getEvents()[-1].additionnalInfo["geneName"] = node.name