def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % ( str(s), p, o)) out.write('%s\n' % ( str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def createTreeFile(logFilname,quick,nRobots,gTime,cutDeadLeaf): print "############################" print "## Parsing of the logFile ##" fileContentArray = open(options.logFile,'r').readlines() currentTimeStamp = -1 # Detection of changement in the timestamps precTimeStamp = -1 # Detection of changement in the timestamps removeDone = False beg= int(options.begin) nGen = int(options.end) step = int(options.step) if(quick):table=[[-1 for i in range(nRobots)] for i in range(nGen + 1)] allPhy = [] newAllPhy=allPhy print "G", for line in fileContentArray: # A line is valid only if it starts by an interation number #timeStampDescription = 'Info\(([0-9]+)\) : robot nb.([0-9]+) take the genome from the robot nb.([0-9]+)' timeStampDescription = '([0-9]+) : ([0-9]+) take ([0-9]+)' timeStampEvaluation = re.compile(timeStampDescription) match = timeStampEvaluation.search(line) if ( match ): #Mechanism to detect a changement in the timestamps ( There might be many lines with the same timeStamp) timeStamp = (int(match.group(1))+1)#/gTime if (timeStamp > int(options.end)): break if ( currentTimeStamp == -1): precTimeStamp = timeStamp else: precTimeStamp = currentTimeStamp currentTimeStamp = timeStamp newGeneration = not (precTimeStamp == currentTimeStamp) #We continue to parse the file while we have not reached the first generation wanted if (timeStamp >= beg) : if(timeStamp == beg and allPhy == []) : allPhy = initAllPhy(beg-1,nRobots) if ( newGeneration ): print "-"+str(timeStamp), stdout.flush() allRoot=[] allPhy=newAllPhy newAllPhy = [] father = int(match.group(3)) son = int(match.group(2)) ################## tree dendropy creation sonId = str(timeStamp) + ' ' + str(son) fatherId = str(timeStamp - 1) + ' ' + str(father) for tree in allPhy : if(cutDeadLeaf): if(newGeneration): leaves= tree.leaf_nodes() for l in leaves: if ( (l.level() < (timeStamp-1)) and (l.level() > 1)): p= l.parent_node p.remove_child(l) while(p.is_leaf() and p.level() > 1) : f= p.parent_node f.remove_child(p) p=f n = None ancestor = None #ancestor at the level N-S allNodes = tree.nodes() #Why look all node and not leaves only? Not sure but the dendropy function which gives us all leaves is faster than looking for leaves manually. And if you choose to look leaves only, be sure to no forget that they change during a generation. for node in allNodes : if(node.label == fatherId) : n=node if(n is not None): son=n.new_child(label=sonId) son.edge_length = 1 sonI= son.label.split(' ')[1] gene = son.label.split(' ')[0] if((int(timeStamp) > beg + int(step) -1) and quick ):#used to cut everythng no more useful ancestor=son.parent_node while( (int(getGen(son))-int(getGen(ancestor))) != int(step ) ): ancestor = ancestor.parent_node ancestorId = ancestor.label.split(' ')[1] #Create a new tree using the ancestor as root new_tree = Tree(tree) mrca_node = new_tree.find_node_with_label(ancestor.label) new_tree.seed_node=mrca_node new_tree.seed_node.parent_node = None addTree(new_tree,newAllPhy) table[int(timeStamp)][int(sonI)]=ancestorId else : newAllPhy = allPhy print "# Parsing done. #" print "############################" if(quick): s="" allFather=0 for i in table: for j in i : s+= str(j)+"," allFather=list(set(i)) try : allFather.remove(-1) except ValueError: #if the script goes here it's because all genomes are transmitted None allAlive=list(i) allAlive = filter (lambda a: a != -1, allAlive) s+=str(len(allFather))+","+str(len(allAlive)) #The count of robots which have transmitted ther genome is bind at the end of the matrix s+="\n" csvFilname="./ancestorsRawData/"+baseName+".csv" print "#\t write the csv file in "+csvFilname out=open(csvFilname,"w" ) out.write(s) out.close() print "#\tdone" print "#------------------------#" return allPhy
def rdf2dendropyTree(file_obj=None, data=None): ''' Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree. Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl# to construct and return a rooted dendropy.Tree object Relies on rdflib and dendropy. Raises ValueError if the graph does not imply exactly 1 root node ''' from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = rdflib.Graph() if file_obj: graph.parse(file=file_obj) else: graph.parse(data=data, format='xml') nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open('parse_rdf.txt', 'w') taxon_set = TaxonSet() OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write('%s %s %s\n' % (str(s), p, o)) out.write('%s\n' % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len( parentless) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len( parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree
def rdf2dendropyTree(filepath): from rdflib.graph import Graph from dendropy import Node, Tree, Edge, TaxonSet, Taxon graph = Graph() graph.parse(filepath) nd_dict = {} has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE if _DEBUGGING: out = open("parse_rdf.txt", "w") taxon_set = TaxonSet() OBO = Namespace(u"http://purl.obolibrary.org/obo/") parentless = set() for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)): parent = nd_dict.get(id(o)) if parent is None: # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID) raw_o = o o = rdflib.resource.Resource(graph, o) o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE]) if o_tu: o_label = o_tu.value(rdflib.RDFS.label) t = Taxon(label=o_label) taxon_set.append(t) parent = Node(taxon=t) else: parent = Node() nd_dict[id(raw_o)] = parent parentless.add(parent) child = nd_dict.get(id(s)) if child is None: raw_s = s s = rdflib.resource.Resource(graph, s) s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE]) if s_tu: s_label = s_tu.value(rdflib.RDFS.label) t = Taxon(label=s_label) taxon_set.append(t) child = Node(taxon=t) else: child = Node() nd_dict[id(raw_s)] = child else: if child in parentless: parentless.remove(child) parent.add_child(child) if _DEBUGGING: out.write("%s %s %s\n" % (str(s), p, o)) out.write("%s\n" % (str(parentless))) if _DEBUGGING: out.close() if len(parentless) != 1: message = ( "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless) ) CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len( parentless ) # we might want to put in a magic number here to suppress really long output if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES: message += ":\n " for i in parentless: if i.label: message += "\n " + i.label else: message += "\n <unlabeled>" + str(id(i)) raise ValueError(message) else: sys.exit("no parentless") return None tree = Tree(taxon_set=taxon_set) tree.seed_node = list(parentless)[0] tree.is_rooted = True return tree