def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''
    
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))
        
        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)
            
            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()
            
            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)
            
        if _DEBUGGING:
            out.write('%s %s %s\n' % ( str(s), p, o))
            out.write('%s\n' % ( str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(parentless) # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
Exemple #2
0
def createTreeFile(logFilname,quick,nRobots,gTime,cutDeadLeaf):
    print "############################"
    print "## Parsing of the logFile ##"
    
    fileContentArray = open(options.logFile,'r').readlines()
    currentTimeStamp = -1 # Detection of changement in the timestamps
    precTimeStamp = -1 # Detection of changement in the timestamps

    removeDone = False
    beg= int(options.begin)	
    nGen = int(options.end)	
    step = int(options.step)

    if(quick):table=[[-1 for i in range(nRobots)] for i in range(nGen + 1)]
	
   
    allPhy = []
    newAllPhy=allPhy
    print "G", 
    for line in fileContentArray:
		# A line is valid only if it starts by an interation number
		#timeStampDescription = 'Info\(([0-9]+)\) : robot nb.([0-9]+) take the genome from the robot nb.([0-9]+)' 
		timeStampDescription = '([0-9]+) : ([0-9]+) take ([0-9]+)'
		timeStampEvaluation = re.compile(timeStampDescription)
		match = timeStampEvaluation.search(line)
		if ( match ):
			#Mechanism to detect a changement in the timestamps ( There might be many lines with the same timeStamp)
			timeStamp = (int(match.group(1))+1)#/gTime
			if (timeStamp > int(options.end)):
				break
			if ( currentTimeStamp == -1):
				precTimeStamp = timeStamp
			else:
				precTimeStamp = currentTimeStamp
			currentTimeStamp = timeStamp
			newGeneration = not (precTimeStamp == currentTimeStamp)
			#We continue to parse the file while we have not reached the first generation wanted
			if (timeStamp >= beg) :
			    if(timeStamp == beg and allPhy == []) : allPhy = initAllPhy(beg-1,nRobots)
			    if ( newGeneration ):
					print "-"+str(timeStamp),
  	                   	        stdout.flush()

					allRoot=[]
					allPhy=newAllPhy
					newAllPhy = []
			    father = int(match.group(3))
			    son = int(match.group(2))
    ################## tree dendropy  creation
			    sonId = str(timeStamp) + ' ' + str(son)
			    fatherId = str(timeStamp - 1) + ' ' + str(father)
			    for tree in allPhy :
				    if(cutDeadLeaf):
				     	if(newGeneration):
					    leaves= tree.leaf_nodes()
					    for l in leaves:
					    	if ( (l.level() < (timeStamp-1)) and (l.level() > 1)):
							p= l.parent_node
	                                                p.remove_child(l)
	                                                while(p.is_leaf() and p.level() > 1) :
	                                                    f= p.parent_node
	                                                    f.remove_child(p)
	                                                    p=f
				    
				    n = None
				    ancestor = None #ancestor at the level N-S
				    allNodes = tree.nodes()
				   #Why look all node and not leaves only? Not sure but the dendropy function which gives us all leaves is faster than looking for leaves manually. And if you choose to look leaves only, be sure to no forget that they change during a generation. 
				    for node in allNodes :
					if(node.label == fatherId) :
					    n=node
				    if(n is not None):
					son=n.new_child(label=sonId)
					son.edge_length = 1
					sonI= son.label.split(' ')[1] 
					gene = son.label.split(' ')[0]
					if((int(timeStamp) > beg + int(step) -1) and quick ):#used to cut everythng no more useful
					    
					    ancestor=son.parent_node
					    while( (int(getGen(son))-int(getGen(ancestor))) != int(step ) ):
						ancestor = ancestor.parent_node
					    ancestorId = ancestor.label.split(' ')[1]
						#Create a new tree using the ancestor as root
					    new_tree = Tree(tree)
					    mrca_node = new_tree.find_node_with_label(ancestor.label)
					    new_tree.seed_node=mrca_node
					    new_tree.seed_node.parent_node = None
					    addTree(new_tree,newAllPhy)
					    table[int(timeStamp)][int(sonI)]=ancestorId
				        else :
				            newAllPhy = allPhy
    print "#       Parsing done.      #"
    print "############################"

    if(quick):
        s=""
        allFather=0
        for i in table:
    	    for j in i :
    			s+=  str(j)+","
    
    	    allFather=list(set(i))
    	    try :
    		allFather.remove(-1)
    	    except ValueError:
		#if the script goes here it's because all genomes are transmitted
		None
    	    allAlive=list(i)
    	    allAlive = filter (lambda a: a != -1, allAlive)
    	    s+=str(len(allFather))+","+str(len(allAlive))    #The count of robots which have transmitted ther genome is bind at the end of the matrix
    	    s+="\n"
        
        csvFilname="./ancestorsRawData/"+baseName+".csv" 
        print "#\t write the csv file in "+csvFilname	
        out=open(csvFilname,"w" )	
        out.write(s)
        out.close()
        print "#\tdone"
        print "#------------------------#"

    return allPhy
Exemple #3
0
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''

    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write('%s %s %s\n' % (str(s), p, o))
            out.write('%s\n' % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(
            parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(
                parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree
def rdf2dendropyTree(filepath):
    from rdflib.graph import Graph
    from dendropy import Node, Tree, Edge, TaxonSet, Taxon

    graph = Graph()
    graph.parse(filepath)
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open("parse_rdf.txt", "w")
    taxon_set = TaxonSet()
    OBO = Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            # print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write("%s %s %s\n" % (str(s), p, o))
            out.write("%s\n" % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = (
            "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d"
            % len(parentless)
        )
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            sys.exit("no parentless")
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree