Exemple #1
0
		data_outs.addStream(sys.stdout)

	# Write out parameters
	data_outs.write("# Run started {}\n".format(util.timestamp()))
	data_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
	data_outs.write("# Parameters:\n")
	optdict = vars(options)
	for (k,v) in optdict.items():
		data_outs.write("#\t{k}: {v}\n".format(k=k, v=v))

	# Read input
	fname =os.path.expanduser(options.in_fname)
	if not os.path.isfile(fname):
	 	raise IOError("# Error: file {} does not exist".format(fname))

	tree_root = Newick.Clade()
	tree_root.parent = None
	tree_root.name = "cellular organisms"


	# Get directory of guide file
	path = os.path.dirname(fname)
	curwd = os.getcwd()

	species_names = []
	with open(fname,'r') as inf:
		os.chdir(path)
		tab = util.readTable(inf, header=True)
		rows = tab.dictrows
		if options.debug:
			rows = [x for x in tab.dictrows][:2]
Exemple #2
0
    def taxid2tree(self, taxid_list, out_fmt="newick"):
        """ This function take a list of gi as input, will generate a path for
            for each gi, then construct a newick or phyloxml tree based on these
            gi pathes.

            out_fmt = newick / phyloxml ...
        """
        treeFile = StringIO()

        # get pathes for a list of taxid
        path_list = [
            ";".join([str(item) for item in self.get_path(taxid)])
            for taxid in taxid_list
        ]

        # read in pathFile, and store node info into nodes
        nodes = {}  # data format {"node_name": Clade_object}
        root = None

        # to parese path iterately
        for i, path in enumerate(path_list):
            line = path.strip().split(";")
            if root is None:
                root = line[0]
            else:
                assert root == line[
                    0], "The %d-th line is from a different root" % (i + 1)

            # check node iterately, first reverse list, to from leaf to root
            # to make sure every node has a parent node
            leaf2root = line[::-1]

            for j, item in enumerate(leaf2root):
                # find child_node and parent_node, root node's parent is itself
                if j == len(line) - 1:
                    child_node = item
                    parent_node = item
                else:
                    child_node = item
                    parent_node = leaf2root[j + 1]

                if nodes.has_key(child_node):
                    continue
                else:
                    # add this node
                    nodes[child_node] = Newick.Clade(name=child_node)
                    # add its parent info
                    nodes[child_node].parent = parent_node

        for node_name, node_clade in nodes.iteritems():
            # find the root node, its parent is itself
            if node_name == node_clade.parent:
                root_node = node_clade
                print "root node is %s, constructing tree ..." % (
                    str(node_name))
            # if node is not root, then find its parent, and add to its parent's clades
            else:
                parent_node = nodes[node_clade.parent]
                parent_node.clades.append(node_clade)
            del node_clade.parent

        tree = Newick.Tree(root=root_node)

        bp.write(tree, treeFile, out_fmt)

        treeStr = treeFile.getvalue()

        return treeStr
Exemple #3
0
 def new_clade(self, parent=None):
     """Return new Newick.Clade, optionally with temporary reference to parent."""
     clade = Newick.Clade()
     if parent:
         clade.parent = parent
     return clade
Exemple #4
0
    def path2newick(self, path2pathFile, node_fmt="taxid", out_fmt="newick"):
        """ This function take taxonomic path file as input, path should be consist
            of taxonomic id, not scitific name, because some scientific name are the
            same in different rank, but ids are unique.

            node_fmt = taxid / sciName

            out_fmt = newick / phyloxml ...

        """
        path, fileName = os.path.split(path2pathFile)
        basename = os.path.splitext(fileName)[0]
        outFile = os.path.join(path,
                               basename + "2tree_" + node_fmt + "." + out_fmt)

        with open(path2pathFile, "r") as pathFile:

            # read in pathFile, and store node info into nodes
            nodes = {}  # data format {"node_name": Clade_object}
            root = None

            # open file to parese line iterately
            for i, line in enumerate(pathFile):
                line = line.strip()
                if line.endswith(";"):
                    line = line.rstrip(";")
                line = line.strip().split(";")
                if root is None:
                    root = line[1]
                else:
                    assert root == line[
                        1], "The %d-th line is from a different root" % (i + 1)

                # check node iterately, first reverse list, to from leaf to root
                # to make sure every node has a parent node
                leaf2root = line[::-1]

                for j, item in enumerate(leaf2root):
                    # find child_node and parent_node, root node's parent is itself
                    if j == len(line) - 1:
                        child_node = item
                        parent_node = item
                    else:
                        child_node = item
                        parent_node = leaf2root[j + 1]

                    if nodes.has_key(child_node):
                        continue
                    else:
                        # add this node
                        nodes[child_node] = Newick.Clade(name=child_node)
                        # add its parent info
                        nodes[child_node].parent = parent_node

            for node_name, node_clade in nodes.iteritems():
                # find the root node, its parent is itself
                if node_name == node_clade.parent:
                    root_node = node_clade
                    print node_clade
                    print "root node found!! "
                # if node is not root, then find its parent, and add to its parent's clades
                else:
                    parent_node = nodes[node_clade.parent]
                    parent_node.clades.append(node_clade)
                del node_clade.parent

            # transform between output node format
            if node_fmt == "taxid":
                tree = Newick.Tree(root=root_node)
            else:
                assert node_fmt == "sciName", "The node_fmt should be taxid or sciName"
                # convert taxid to sciName
                for node_name, node in nodes.iteritems():
                    node_name = self.get_sciName(node_name)
                    for child in node.clades:
                        if child:
                            child.name = self.get_sciName(child.name)
                root_node.name = self.get_sciName(root_node.name)
                tree = Newick.Tree(root=root_node)

            # write tree to file
            print 'Writing %s tree to %s...' % (out_fmt, outFile)

            bp.write(tree, outFile, out_fmt)