Beispiel #1
0
    def testParserBigFile(self):
#        ps = PositiveSelectionParser
#        exp = Consume(ps.selection_header).suppress()+OneOrMore(ps.selection_line)+empty
#        exp.setDebug(True)
#        with open(self.file+"1") as handle:
#            r = exp.parseFile(handle)
#        print "result",r
        parser = PositiveSelectionParser()
        with open(self.file) as handle:
            models = list(parser.parse(handle))
        assert len(models)==3
        
        index = {}
        for model in models:
            index[model.model] = sorted([(ps.column, ps.probability) for ps in model.positive_selection])
            
        assert len(index[2]) == 2
        assert index[2] == [(188,float("0.573")),
                            (193,float("0.560"))] 

        assert len(index[8]) == 3 
        assert index[8][-1] == (582,float("0.932"))

        print index[3]
        assert len(index[3]) == 4
        assert index[3] == [(2,float("0.925")),
                            (3,float("0.998")),
                            (4,float("0.917")),
                            (582,float("1.0"))] 
Beispiel #2
0
    def _codeml(self):
        if not self.codemlFile:
            return
        assert self.family.id != None
        assert self.tree.id != None

        # We need to convert the columns to the original alignment indices
        mapper = CulledColumnMapper(self.alignment,
                                    self.alignment.culled_columns)
        parser = PositiveSelectionParser()
        models = list(parser.parse(self.codemlFile))
        runtime().debug("Found", len(models), "models")
        for i, model in enumerate(models):
            model.tree_key = self.tree.id
            self.session.add(model)
            self.session.flush()
            ps = list(model.ps)
            runtime().debug("Found", len(ps), "sites in model", model.model)
            for j, site in enumerate(ps):
                site.codeml_key = model.id
                # Indices in CodeML start at 1, convert to 0 and then map
                orig = site.column
                site.column = mapper[site.column - 1]
                runtime().debug("column", orig, "mapped to", site.column,
                                site.probability)
                try:
                    self.session.add(site)
                except:
                    runtime().debug(i, ":", j, " failure on column", orig,
                                    "mapped to", site.column, site.probability)
                    raise
            runtime().debug("Finished with model")
            self.session.flush()


#        with open(self.codemlFile) as handle:
#            text = handle.read()
#        from hpf.hddb.db import CodeML
#        self.codeml = CodeML(tree_key=self.tree.id,
#                             filename=self.codemlFile,
#                             text=text)
#        self.session.add(self.codeml)
#        self.session.flush()
#        parser = LRTParser(self.alignment, self.alignment.culled_columns,self.codeml)
#        with open(self.codemlFile) as handle:
#            for selection in parser.parse(handle):
#                selection.codeml_key = self.codeml.id
#                self.session.merge(selection)
        runtime().debug("finished import codeml")
Beispiel #3
0
    def _codeml(self):
        if not self.codemlFile:
            return
        assert self.family.id != None
        assert self.tree.id != None

        # We need to convert the columns to the original alignment indices
        mapper = CulledColumnMapper(self.alignment, self.alignment.culled_columns)
        parser = PositiveSelectionParser()
        models = list(parser.parse(self.codemlFile))
        runtime().debug("Found", len(models), "models")
        for i, model in enumerate(models):
            model.tree_key = self.tree.id
            self.session.add(model)
            self.session.flush()
            ps = list(model.ps)
            runtime().debug("Found", len(ps), "sites in model", model.model)
            for j, site in enumerate(ps):
                site.codeml_key = model.id
                # Indices in CodeML start at 1, convert to 0 and then map
                orig = site.column
                site.column = mapper[site.column - 1]
                runtime().debug("column", orig, "mapped to", site.column, site.probability)
                try:
                    self.session.add(site)
                except:
                    runtime().debug(i, ":", j, " failure on column", orig, "mapped to", site.column, site.probability)
                    raise
            runtime().debug("Finished with model")
            self.session.flush()

        #        with open(self.codemlFile) as handle:
        #            text = handle.read()
        #        from hpf.hddb.db import CodeML
        #        self.codeml = CodeML(tree_key=self.tree.id,
        #                             filename=self.codemlFile,
        #                             text=text)
        #        self.session.add(self.codeml)
        #        self.session.flush()
        #        parser = LRTParser(self.alignment, self.alignment.culled_columns,self.codeml)
        #        with open(self.codemlFile) as handle:
        #            for selection in parser.parse(handle):
        #                selection.codeml_key = self.codeml.id
        #                self.session.merge(selection)
        runtime().debug("finished import codeml")
Beispiel #4
0
def main(fam, t, *files):
    global family, tree, session
    session = Session()
    family = int(fam)
    tree = int(t)

    for file in files:
        #runtime().set_debug(1)
        runtime().debug("Using file", file)
        with open(file) as handle:
            ps = PositiveSelectionParser().parse(handle)
        count = consume(imap(merge, ps))
        runtime().debug("Found", count, "sites")

    session.commit()
    session.close()
Beispiel #5
0
 def testParserSmallFile(self):
     parser = PositiveSelectionParser()
     with open(self.file+"1") as handle:
         models = list(parser.parse(handle))
     assert len(models)==1
     assert len(models[0].positive_selection)==4