Esempio n. 1
0
    def _subtree(self,root):
        """
        Split a tree on a given node, pruning from the original tree.
        @param root: the node to use as the new root 
        @return: subtree rooted on this node.
        """
        sub = Tree(weight=self.tree.weight, 
                        rooted=self.tree.rooted, 
                        name=self.tree.name, 
                        data=self.tree.dataclass, 
                        max_support=self.tree.max_support)

        sub.node(sub.root).data = self.tree.node(root).data
        def _add(old_id,new_id):
            """
            Walk from this node, using the id from the old tree, and the id
            from the new tree to both load the data from the old tree and link 
            to the correct node in the new tree.
            """
            for old_succ in self.tree.node(old_id).succ:
                #print old_id,new_id
                to_add = Node(data = self.tree.node(old_succ).data)
                new_succ = sub.add(to_add,new_id)
                #print "\t",old_succ,new_succ
                _add(old_succ,new_succ)
        _add(root,sub.root)
        self.annotater.annotate(sub)
        unlink(self.tree,root)
        return sub
Esempio n. 2
0
File: oid.py Progetto: bsmithers/hpf
class Test(unittest.TestCase):


    def setUp(self):
        self.io = StringIO(diag_str)
        self.io.seek(0)
        self.tree = Tree(tree_str)
        self.tree.node(0).get_data().id = 10
        self.tree.node(1).get_data().id = 20
        self.tree.node(2).get_data().id = 30
        
    def tearDown(self):
        pass

    def testParser(self):
        parser = DiagCharsParser(self.tree)
        diagchars = list(parser.parse(self.io))
        index = defaultdict(lambda: list())
        for d in diagchars:
            index[d.tree_node_key].append(d)
            index[d.tree_node_key].sort(cmp=lambda x,y:cmp(x.column,y.column))

        assert len(index[10]) == 5
        assert len(index[20]) == 8
        assert len(index[30]) == 5
        assert index[10][0].column == 0
        assert index[10][4].aa == "IK"
        assert index[20][2].column == 8 
        assert index[20][2].aa == "D"
Esempio n. 3
0
    def _subtree(self, root):
        # Find paths to targets to build a new tree
        g = self._subgraph(root)
        #for node in g:
        #    print self.tree.node(node).data.taxon

        sub = Tree(weight=self.tree.weight, 
                   rooted=self.tree.rooted, 
                   name=self.tree.name, 
                   data=self.tree.dataclass, 
                   max_support=self.tree.max_support)

        sub.node(sub.root).data = self.tree.node(root).data
        def _add(old_id,new_id):
            """
            Walk from this node, using the id from the old tree, and the id
            from the new tree to both load the data from the old tree and link 
            to the correct node in the new tree.
            """
            for old_succ in g.successors_iter(old_id):
                to_add = Node(data = self.tree.node(old_succ).data)
                new_succ = sub.add(to_add,new_id)
                _add(old_succ,new_succ)
        _add(root,sub.root)

        # Delete nodes from old tree
        for node in g:
            #print "collapsing node",node
            self.collapse(self.tree,node)
            
        return sub
Esempio n. 4
0
File: oid.py Progetto: bsmithers/hpf
class OIDTreeParser(object):
    """
    Parse the oid.tre PAUP run file for the given Nexus Tree.
    Uses the oid translation table to rename node taxon correctly.
    @deprecated
    """
    
    def __init__(self):
        assert False, "Dummy, use the NEXUS parser" 
    
    TRANSLATION = "Translate"
    NUMBER = "number"
    TAXON = "taxon"
    TREE = "tree"

    name_line = Group(Word(nums).setResultsName(NUMBER)+
                 Word(alphanums+punctuation).setParseAction(lambda tokens:[tokens[0].replace(",","")]).setResultsName(TAXON)
                 )
    
    tree = (Consume(Literal(";")).setResultsName(TREE)
            )
        
    translation = (OneOrMore(name_line).setResultsName(TRANSLATION)+
                   Literal(";")
                   )
    
    expression = (Consume(Literal(TRANSLATION)).suppress()+
                  translation+
                  Consume(Literal("tree PAUP_1 = [&R]")).suppress()+
                  tree+
                  empty
                  )
    
    
    def parse(self,handle):
        ps = OIDTreeParser
        result = OIDTreeParser.expression.parseFile(handle)
        from Bio.Nexus.Trees import Tree
        self.taxon = {}
        runtime().debug("Found",len(result[ps.TRANSLATION]),"species")
        print result
        for translation in result[ps.TRANSLATION]:
            print translation
            num = translation[ps.NUMBER]
            taxon = translation[ps.TAXON]
            runtime().debug(num,taxon)
            self.taxon[num] = taxon
        
        self.tree = Tree(result[ps.TREE])
        assert self.tree.count_terminals() == len(self.taxon.keys(), 
               "Translation count and # of tree terminals don't match")
        for node in self.tree.get_terminals():
            runtime().debug("Rename",node.get_data().taxon,"to",self.taxon[node.get_data().taxon])
            node.get_data().taxon = self.taxon[node.get_data().taxon]
        self.tree.no
        return self
Esempio n. 5
0
def add(file, family_key):
    with open(file) as handle:
        from Bio.Nexus.Trees import Tree as NexusTree
        tree = NexusTree(handle.read())
    dbtree = Tree()
    dbtree.family_key = family_key
    dbtree.text = tree.to_string(plain=False,plain_newick=True)
    dbtree.filename = os.path.basename(file)
    session.add(dbtree)
    session.flush()
    tree.name = dbtree.id
    for node in TreeNodeFactory().create(tree):
        session.add(node)
Esempio n. 6
0
def add(file, family_key):
    with open(file) as handle:
        from Bio.Nexus.Trees import Tree as NexusTree
        tree = NexusTree(handle.read())
    dbtree = Tree()
    dbtree.family_key = family_key
    dbtree.text = tree.to_string(plain=False, plain_newick=True)
    dbtree.filename = os.path.basename(file)
    session.add(dbtree)
    session.flush()
    tree.name = dbtree.id
    for node in TreeNodeFactory().create(tree):
        session.add(node)
Esempio n. 7
0
File: oid.py Progetto: bsmithers/hpf
 def setUp(self):
     self.io = StringIO(diag_str)
     self.io.seek(0)
     self.tree = Tree(tree_str)
     self.tree.node(0).get_data().id = 10
     self.tree.node(1).get_data().id = 20
     self.tree.node(2).get_data().id = 30
Esempio n. 8
0
def split(tree_file, size, nexus=False, dir=None):
    print file, size
    if nexus:
        tree = Nexus(tree_file).trees[0]
        tree2 = Nexus(tree_file).trees[0]
    else:
        with open(tree_file) as handle:
            tree_str = handle.read()
            tree = Tree(tree_str)
            tree2 = Tree(tree_str)
#    with open(align_file) as handle:
#        alignment = AlignIO.read(handle, "phylip")
    splitter = TreeSplitter(tree,
                            max_size=size,
                            annotater=UnrootedShortestPath)
    subs = list(splitter.subtrees())
    runtime().debug("Found", len(subs), subs)
    dir = dir if dir else os.path.dirname(tree_file)

    for i, tree in enumerate(subs):
        nodes = [tree.node(node) for node in tree.all_ids()]
        taxa = set(
            [node.data.taxon for node in nodes if node.data.taxon != None])
        for terminal in tree2.get_terminals():
            node = tree2.node(terminal)
            if node.data.taxon in taxa:
                node.data.taxon = "%i-" % i + node.data.taxon


#        sub_taxa = tree.get_taxa()
#        sub_alignment = Alignment(alphabet=alignment._alphabet)
#        sub_alignment._records = [r for r in alignment._records if r.id in sub_taxa]
#        assert len(sub_taxa)==len(sub_alignment._records)
##        align_out = "%s.%i" % (os.path.join(dir,os.path.basename(align_file)),i)
#        with open(align_out,"w") as handle:
#            AlignIO.write([sub_alignment], handle, "phylip")
#        from hpf.phylip import interleave
#        interleave(align_out)
        with open(
                "%s.%i" % (os.path.join(dir, os.path.basename(tree_file)), i),
                "w") as handle:
            print >> handle, tree.to_string(plain_newick=True,
                                            branchlengths_only=False) + ";"
    with open("%s.annotated" % os.path.join(dir, os.path.basename(tree_file)),
              "w") as handle:
        print >> handle, tree2.to_string(plain_newick=True,
                                         branchlengths_only=False) + ";"
Esempio n. 9
0
File: oid.py Progetto: bsmithers/hpf
    def _tree(self):
        session = self.session

        # # Load the tree file and rename the taxa.
        # from Bio.Nexus.Nexus import Nexus
        # nex=Nexus(self.treeFile)
        # self.nexus = nex.trees[0]

        from Bio.Nexus.Trees import Tree as NewickTree

        tree_str = open(self.treeFile).read()
        self.nexus = NewickTree(tree_str)

        # Rename all the taxa.
        for id in self.nexus.get_terminals():
            node = self.nexus.node(id)
            node.data.taxon = self._index(node.data.taxon)

        # Create the DB object
        from hpf.hddb.db import Tree

        self.tree = Tree(
            alignment_key=self.alignment.id,
            text=self.nexus.to_string(plain=False, plain_newick=True),
            filename=self.treeFile,
        )
        session.add(self.tree)
        session.flush()

        # Now add in the node references
        self.nexus.name = self.tree.id
        assert self.tree.id != None
        runtime().debug("Added tree", self.tree)
        from hpf.hddb.db import TreeNodeFactory

        nodes = list(TreeNodeFactory().create(self.nexus))
        for node in nodes:
            node.ancestor_node = node.ancestor.id if node.ancestor else None
            # This should add the new object into the session
            self.tree.nodes.append(node)
            # session.add(node)
            session.flush()

        runtime().debug("Appended", len(nodes), "tree nodes")
        session.flush()

        # Now import the diagnostic characters and reference the nodes.
        from hpf.amnh.oid import DiagCharsParser
        from hpf.hddb.db import TreeFactory

        biotree = TreeFactory(name_func=lambda node: str(node.id)).create(self.tree.nodes, self.tree.id)
        parser = DiagCharsParser(biotree)
        runtime().debug(self.treeDiagCharsFile)
        with open(self.treeDiagCharsFile) as handle:
            diagchars = list(parser.parse(handle))
            runtime().debug("DiagChars", len(diagchars))
            for d in diagchars:
                session.add(d)
        session.flush()
Esempio n. 10
0
	def __init__(self, filename):
		self.filename = filename
		self.tree = None

		f = open(self.filename, 'r')
		chunk = f.read()
		f.close()
		self.tree = Tree(chunk)
Esempio n. 11
0
    def _tree(self):
        session = self.session

        # # Load the tree file and rename the taxa.
        # from Bio.Nexus.Nexus import Nexus
        # nex=Nexus(self.treeFile)
        # self.nexus = nex.trees[0]

        from Bio.Nexus.Trees import Tree as NewickTree
        tree_str = open(self.treeFile).read()
        self.nexus = NewickTree(tree_str)

        # Rename all the taxa.
        for id in self.nexus.get_terminals():
            node = self.nexus.node(id)
            node.data.taxon = self._index(node.data.taxon)

        # Create the DB object
        from hpf.hddb.db import Tree
        self.tree = Tree(alignment_key=self.alignment.id,
                         text=self.nexus.to_string(plain=False,
                                                   plain_newick=True),
                         filename=self.treeFile)
        session.add(self.tree)
        session.flush()

        # Now add in the node references
        self.nexus.name = self.tree.id
        assert self.tree.id != None
        runtime().debug("Added tree", self.tree)
        from hpf.hddb.db import TreeNodeFactory
        nodes = list(TreeNodeFactory().create(self.nexus))
        for node in nodes:
            node.ancestor_node = node.ancestor.id if node.ancestor else None
            # This should add the new object into the session
            self.tree.nodes.append(node)
            #session.add(node)
            session.flush()

        runtime().debug("Appended", len(nodes), "tree nodes")
        session.flush()

        # Now import the diagnostic characters and reference the nodes.
        from hpf.amnh.oid import DiagCharsParser
        from hpf.hddb.db import TreeFactory
        biotree = TreeFactory(name_func=lambda node: str(node.id)).create(
            self.tree.nodes, self.tree.id)
        parser = DiagCharsParser(biotree)
        runtime().debug(self.treeDiagCharsFile)
        with open(self.treeDiagCharsFile) as handle:
            diagchars = list(parser.parse(handle))
            runtime().debug("DiagChars", len(diagchars))
            for d in diagchars:
                session.add(d)
        session.flush()
Esempio n. 12
0
class NewickReader:
	"""
		Just a wrapper around Bio.Nexus.Trees to read newick files. 
		In addition, since many of my newick taxon labels are just ncRNA <db_id>s, 
		support database lookup for these IDs as well.
	"""
	def __init__(self, filename):
		self.filename = filename
		self.tree = None

		f = open(self.filename, 'r')
		chunk = f.read()
		f.close()
		self.tree = Tree(chunk)

	def distance(self, taxon1, taxon2):
		"""
			Note that here "taxon" simply means whatever the terminal nodes' data are.
			Since most of my newick files are labeled with <db_id>, it could just be ex: '34969'.
		"""
		id1 = self.tree.search_taxon(taxon1)
		id2 = self.tree.search_taxon(taxon2)
		return self.tree.distance(id1, id2)
Esempio n. 13
0
File: oid.py Progetto: bsmithers/hpf
 def parse(self,handle):
     ps = OIDTreeParser
     result = OIDTreeParser.expression.parseFile(handle)
     from Bio.Nexus.Trees import Tree
     self.taxon = {}
     runtime().debug("Found",len(result[ps.TRANSLATION]),"species")
     print result
     for translation in result[ps.TRANSLATION]:
         print translation
         num = translation[ps.NUMBER]
         taxon = translation[ps.TAXON]
         runtime().debug(num,taxon)
         self.taxon[num] = taxon
     
     self.tree = Tree(result[ps.TREE])
     assert self.tree.count_terminals() == len(self.taxon.keys(), 
            "Translation count and # of tree terminals don't match")
     for node in self.tree.get_terminals():
         runtime().debug("Rename",node.get_data().taxon,"to",self.taxon[node.get_data().taxon])
         node.get_data().taxon = self.taxon[node.get_data().taxon]
     self.tree.no
     return self
Esempio n. 14
0
File: oid.py Progetto: bsmithers/hpf
class OIDImporter(object):
    """
    Import a set of OID files into the database
    """

    def __init__(
        self,
        familyName,
        alignFile,
        alignColcullLog,
        alignSeqcullLog,
        treeFile,
        treeDiagCharsFile,
        codemlFile=None,
        alignFormat="fasta",
        oid_key=None,
    ):
        self.familyName = familyName
        self.treeFile = treeFile
        self.treeDiagCharsFile = treeDiagCharsFile
        self.alignFile = alignFile
        self.alignColcullLog = alignColcullLog
        self.alignSeqcullLog = alignSeqcullLog
        self.codemlFile = codemlFile
        self.alignFormat = alignFormat
        self.oid_key = oid_key

    def merge(self):
        from hpf.hddb.db import Session, Family

        self.session = Session()

        self.family = self.session.query(Family).filter(Family.name == self.familyName).first()
        if not self.family:
            runtime().debug("Creating family", self.familyName)
            self._family()
            self._alignment()
            self._tree()
        else:
            self.alignment = self.family.alignment
            self.tree = self.alignment.tree
            runtime().debug("Found family", self.family.id)

        if not self.family.alignments[0].tree.codeml:
            runtime().debug("Importing codeml")
            self._codeml()
        else:
            runtime().debug("Already found codeml", self.family.alignments[0].tree.codeml.id)

        # Commit the session, close, and finish
        self.session.commit()
        self.session.close()

    def _index(self, name):
        n = name.split("#")[-1]
        if n.startswith("N"):
            n = n[1:]
        assert n.isdigit()
        return n

    def _tree(self):
        session = self.session

        # # Load the tree file and rename the taxa.
        # from Bio.Nexus.Nexus import Nexus
        # nex=Nexus(self.treeFile)
        # self.nexus = nex.trees[0]

        from Bio.Nexus.Trees import Tree as NewickTree

        tree_str = open(self.treeFile).read()
        self.nexus = NewickTree(tree_str)

        # Rename all the taxa.
        for id in self.nexus.get_terminals():
            node = self.nexus.node(id)
            node.data.taxon = self._index(node.data.taxon)

        # Create the DB object
        from hpf.hddb.db import Tree

        self.tree = Tree(
            alignment_key=self.alignment.id,
            text=self.nexus.to_string(plain=False, plain_newick=True),
            filename=self.treeFile,
        )
        session.add(self.tree)
        session.flush()

        # Now add in the node references
        self.nexus.name = self.tree.id
        assert self.tree.id != None
        runtime().debug("Added tree", self.tree)
        from hpf.hddb.db import TreeNodeFactory

        nodes = list(TreeNodeFactory().create(self.nexus))
        for node in nodes:
            node.ancestor_node = node.ancestor.id if node.ancestor else None
            # This should add the new object into the session
            self.tree.nodes.append(node)
            # session.add(node)
            session.flush()

        runtime().debug("Appended", len(nodes), "tree nodes")
        session.flush()

        # Now import the diagnostic characters and reference the nodes.
        from hpf.amnh.oid import DiagCharsParser
        from hpf.hddb.db import TreeFactory

        biotree = TreeFactory(name_func=lambda node: str(node.id)).create(self.tree.nodes, self.tree.id)
        parser = DiagCharsParser(biotree)
        runtime().debug(self.treeDiagCharsFile)
        with open(self.treeDiagCharsFile) as handle:
            diagchars = list(parser.parse(handle))
            runtime().debug("DiagChars", len(diagchars))
            for d in diagchars:
                session.add(d)
        session.flush()

    def _codeml(self):
        if not self.codemlFile:
            return
        assert self.family.id != None
        assert self.tree.id != None

        # We need to convert the columns to the original alignment indices
        mapper = CulledColumnMapper(self.alignment, self.alignment.culled_columns)
        parser = PositiveSelectionParser()
        models = list(parser.parse(self.codemlFile))
        runtime().debug("Found", len(models), "models")
        for i, model in enumerate(models):
            model.tree_key = self.tree.id
            self.session.add(model)
            self.session.flush()
            ps = list(model.ps)
            runtime().debug("Found", len(ps), "sites in model", model.model)
            for j, site in enumerate(ps):
                site.codeml_key = model.id
                # Indices in CodeML start at 1, convert to 0 and then map
                orig = site.column
                site.column = mapper[site.column - 1]
                runtime().debug("column", orig, "mapped to", site.column, site.probability)
                try:
                    self.session.add(site)
                except:
                    runtime().debug(i, ":", j, " failure on column", orig, "mapped to", site.column, site.probability)
                    raise
            runtime().debug("Finished with model")
            self.session.flush()

        #        with open(self.codemlFile) as handle:
        #            text = handle.read()
        #        from hpf.hddb.db import CodeML
        #        self.codeml = CodeML(tree_key=self.tree.id,
        #                             filename=self.codemlFile,
        #                             text=text)
        #        self.session.add(self.codeml)
        #        self.session.flush()
        #        parser = LRTParser(self.alignment, self.alignment.culled_columns,self.codeml)
        #        with open(self.codemlFile) as handle:
        #            for selection in parser.parse(handle):
        #                selection.codeml_key = self.codeml.id
        #                self.session.merge(selection)
        runtime().debug("finished import codeml")

    def _alignment(self):
        session = self.session

        # Read the alignment
        from Bio import AlignIO

        with open(self.alignFile) as handle:
            align = AlignIO.read(handle, self.alignFormat)
        # Rename 'id' with the correct protein key
        for record in align:
            record.id = self._index(record.id)
        # Write to a text buffer and create the DB object
        text = StringIO()
        AlignIO.write([align], text, self.alignFormat)
        from hpf.hddb.db import Alignment

        self.alignment = Alignment(
            family_key=self.family.id, format=self.alignFormat, filename=self.alignFile, text=text.getvalue()
        )
        # Add to session and flush
        session.add(self.alignment)
        session.flush()

        # Flip through the proteins in the alignment and add
        # the records.
        for record in align:
            protein_key = record.id
            assert protein_key != 0 and protein_key != None, protein_key
            runtime().debug("protein: ", protein_key)
            from hpf.hddb.db import AlignmentProtein

            s = AlignmentProtein(alignment_key=self.alignment.id, protein_key=protein_key, sequence=str(record.seq))
            session.add(s)
            session.flush()

            # There may exist multiple alignments, but the definition
            # of membership in the family is done here.
            from hpf.hddb.db import FamilyProtein

            fs = FamilyProtein(family_key=self.family.id, protein_key=protein_key, seed=True)
            session.merge(fs)

        # Now read the colulmn culling log.  Indices start at 0 here.
        from hpf.hddb.db import AlignmentColcull, AlignmentSeqcull

        with open(self.alignColcullLog) as handle:
            for line in handle:
                column, gap, taxa, ratio = line.split()
                col = AlignmentColcull(alignment_key=self.alignment.id, column=column, gap_percentage=ratio)
                session.merge(col)
        with open(self.alignSeqcullLog) as handle:
            # rice#1182215    0.712765957446808
            for line in handle:
                parts = line.split()
                seq, score = parts
                seq = self._index(seq)
                # seq.split("#")[-1]
                if not seq.isdigit():
                    print parts, "SEQ:", seq
                    assert false
                cul = AlignmentSeqcull(alignment_key=self.alignment.id, protein_key=seq, score=score)
        session.flush()

    def _family(self):
        session = self.session
        from hpf.hddb.db import Family

        self.family = Family(name=self.familyName, experiment_key=0)
        session.add(self.family)
        session.flush()
        trefname = dirbase + bpg[0:6] + '/' + bpg[0:9] + '/user/' + bpg[
            0:9] + '.nj'
        handle = open(trefname, 'r')

        treestr = ''

        for line in handle:
            treestr += line.strip()

        handle.close()

        for oldid in oldid_newid:
            treestr = treestr.replace('bpgseq%d' % oldid, oldid_newid[oldid],
                                      1)

        mytreeobj = Tree(tree=treestr, rooted=True)

        ### prune taxa we don't want ###

        alltaxa = mytreeobj.get_taxa()
        badtaxa = []
        slowest_inparalogs = {}

        for taxon in alltaxa:
            if taxon not in oldid_newid.values():
                badtaxa.append(taxon)

            else:
                sp = taxon.split('_bpgseq')[0]

                if sp in slowest_inparalogs:
Esempio n. 16
0
class OIDImporter(object):
    """
    Import a set of OID files into the database
    """
    def __init__(self,
                 familyName,
                 alignFile,
                 alignColcullLog,
                 alignSeqcullLog,
                 treeFile,
                 treeDiagCharsFile,
                 codemlFile=None,
                 alignFormat="fasta",
                 oid_key=None):
        self.familyName = familyName
        self.treeFile = treeFile
        self.treeDiagCharsFile = treeDiagCharsFile
        self.alignFile = alignFile
        self.alignColcullLog = alignColcullLog
        self.alignSeqcullLog = alignSeqcullLog
        self.codemlFile = codemlFile
        self.alignFormat = alignFormat
        self.oid_key = oid_key

    def merge(self):
        from hpf.hddb.db import Session, Family
        self.session = Session()

        self.family = self.session.query(Family).filter(
            Family.name == self.familyName).first()
        if not self.family:
            runtime().debug("Creating family", self.familyName)
            self._family()
            self._alignment()
            self._tree()
        else:
            self.alignment = self.family.alignment
            self.tree = self.alignment.tree
            runtime().debug("Found family", self.family.id)

        if not self.family.alignments[0].tree.codeml:
            runtime().debug("Importing codeml")
            self._codeml()
        else:
            runtime().debug("Already found codeml",
                            self.family.alignments[0].tree.codeml.id)

        # Commit the session, close, and finish
        self.session.commit()
        self.session.close()

    def _index(self, name):
        n = name.split("#")[-1]
        if n.startswith("N"):
            n = n[1:]
        assert n.isdigit()
        return n

    def _tree(self):
        session = self.session

        # # Load the tree file and rename the taxa.
        # from Bio.Nexus.Nexus import Nexus
        # nex=Nexus(self.treeFile)
        # self.nexus = nex.trees[0]

        from Bio.Nexus.Trees import Tree as NewickTree
        tree_str = open(self.treeFile).read()
        self.nexus = NewickTree(tree_str)

        # Rename all the taxa.
        for id in self.nexus.get_terminals():
            node = self.nexus.node(id)
            node.data.taxon = self._index(node.data.taxon)

        # Create the DB object
        from hpf.hddb.db import Tree
        self.tree = Tree(alignment_key=self.alignment.id,
                         text=self.nexus.to_string(plain=False,
                                                   plain_newick=True),
                         filename=self.treeFile)
        session.add(self.tree)
        session.flush()

        # Now add in the node references
        self.nexus.name = self.tree.id
        assert self.tree.id != None
        runtime().debug("Added tree", self.tree)
        from hpf.hddb.db import TreeNodeFactory
        nodes = list(TreeNodeFactory().create(self.nexus))
        for node in nodes:
            node.ancestor_node = node.ancestor.id if node.ancestor else None
            # This should add the new object into the session
            self.tree.nodes.append(node)
            #session.add(node)
            session.flush()

        runtime().debug("Appended", len(nodes), "tree nodes")
        session.flush()

        # Now import the diagnostic characters and reference the nodes.
        from hpf.amnh.oid import DiagCharsParser
        from hpf.hddb.db import TreeFactory
        biotree = TreeFactory(name_func=lambda node: str(node.id)).create(
            self.tree.nodes, self.tree.id)
        parser = DiagCharsParser(biotree)
        runtime().debug(self.treeDiagCharsFile)
        with open(self.treeDiagCharsFile) as handle:
            diagchars = list(parser.parse(handle))
            runtime().debug("DiagChars", len(diagchars))
            for d in diagchars:
                session.add(d)
        session.flush()

    def _codeml(self):
        if not self.codemlFile:
            return
        assert self.family.id != None
        assert self.tree.id != None

        # We need to convert the columns to the original alignment indices
        mapper = CulledColumnMapper(self.alignment,
                                    self.alignment.culled_columns)
        parser = PositiveSelectionParser()
        models = list(parser.parse(self.codemlFile))
        runtime().debug("Found", len(models), "models")
        for i, model in enumerate(models):
            model.tree_key = self.tree.id
            self.session.add(model)
            self.session.flush()
            ps = list(model.ps)
            runtime().debug("Found", len(ps), "sites in model", model.model)
            for j, site in enumerate(ps):
                site.codeml_key = model.id
                # Indices in CodeML start at 1, convert to 0 and then map
                orig = site.column
                site.column = mapper[site.column - 1]
                runtime().debug("column", orig, "mapped to", site.column,
                                site.probability)
                try:
                    self.session.add(site)
                except:
                    runtime().debug(i, ":", j, " failure on column", orig,
                                    "mapped to", site.column, site.probability)
                    raise
            runtime().debug("Finished with model")
            self.session.flush()


#        with open(self.codemlFile) as handle:
#            text = handle.read()
#        from hpf.hddb.db import CodeML
#        self.codeml = CodeML(tree_key=self.tree.id,
#                             filename=self.codemlFile,
#                             text=text)
#        self.session.add(self.codeml)
#        self.session.flush()
#        parser = LRTParser(self.alignment, self.alignment.culled_columns,self.codeml)
#        with open(self.codemlFile) as handle:
#            for selection in parser.parse(handle):
#                selection.codeml_key = self.codeml.id
#                self.session.merge(selection)
        runtime().debug("finished import codeml")

    def _alignment(self):
        session = self.session

        # Read the alignment
        from Bio import AlignIO
        with open(self.alignFile) as handle:
            align = AlignIO.read(handle, self.alignFormat)
        # Rename 'id' with the correct protein key
        for record in align:
            record.id = self._index(record.id)
        # Write to a text buffer and create the DB object
        text = StringIO()
        AlignIO.write([align], text, self.alignFormat)
        from hpf.hddb.db import Alignment
        self.alignment = Alignment(family_key=self.family.id,
                                   format=self.alignFormat,
                                   filename=self.alignFile,
                                   text=text.getvalue())
        # Add to session and flush
        session.add(self.alignment)
        session.flush()

        # Flip through the proteins in the alignment and add
        # the records.
        for record in align:
            protein_key = record.id
            assert protein_key != 0 and protein_key != None, protein_key
            runtime().debug("protein: ", protein_key)
            from hpf.hddb.db import AlignmentProtein
            s = AlignmentProtein(alignment_key=self.alignment.id,
                                 protein_key=protein_key,
                                 sequence=str(record.seq))
            session.add(s)
            session.flush()

            # There may exist multiple alignments, but the definition
            # of membership in the family is done here.
            from hpf.hddb.db import FamilyProtein
            fs = FamilyProtein(family_key=self.family.id,
                               protein_key=protein_key,
                               seed=True)
            session.merge(fs)

        # Now read the colulmn culling log.  Indices start at 0 here.
        from hpf.hddb.db import AlignmentColcull, AlignmentSeqcull
        with open(self.alignColcullLog) as handle:
            for line in handle:
                column, gap, taxa, ratio = line.split()
                col = AlignmentColcull(alignment_key=self.alignment.id,
                                       column=column,
                                       gap_percentage=ratio)
                session.merge(col)
        with open(self.alignSeqcullLog) as handle:
            #rice#1182215    0.712765957446808
            for line in handle:
                parts = line.split()
                seq, score = parts
                seq = self._index(seq)
                #seq.split("#")[-1]
                if not seq.isdigit():
                    print parts, "SEQ:", seq
                    assert false
                cul = AlignmentSeqcull(alignment_key=self.alignment.id,
                                       protein_key=seq,
                                       score=score)
        session.flush()

    def _family(self):
        session = self.session
        from hpf.hddb.db import Family
        self.family = Family(name=self.familyName, experiment_key=0)
        session.add(self.family)
        session.flush()