def testParserBigFile(self): # ps = PositiveSelectionParser # exp = Consume(ps.selection_header).suppress()+OneOrMore(ps.selection_line)+empty # exp.setDebug(True) # with open(self.file+"1") as handle: # r = exp.parseFile(handle) # print "result",r parser = PositiveSelectionParser() with open(self.file) as handle: models = list(parser.parse(handle)) assert len(models)==3 index = {} for model in models: index[model.model] = sorted([(ps.column, ps.probability) for ps in model.positive_selection]) assert len(index[2]) == 2 assert index[2] == [(188,float("0.573")), (193,float("0.560"))] assert len(index[8]) == 3 assert index[8][-1] == (582,float("0.932")) print index[3] assert len(index[3]) == 4 assert index[3] == [(2,float("0.925")), (3,float("0.998")), (4,float("0.917")), (582,float("1.0"))]
def _codeml(self): if not self.codemlFile: return assert self.family.id != None assert self.tree.id != None # We need to convert the columns to the original alignment indices mapper = CulledColumnMapper(self.alignment, self.alignment.culled_columns) parser = PositiveSelectionParser() models = list(parser.parse(self.codemlFile)) runtime().debug("Found", len(models), "models") for i, model in enumerate(models): model.tree_key = self.tree.id self.session.add(model) self.session.flush() ps = list(model.ps) runtime().debug("Found", len(ps), "sites in model", model.model) for j, site in enumerate(ps): site.codeml_key = model.id # Indices in CodeML start at 1, convert to 0 and then map orig = site.column site.column = mapper[site.column - 1] runtime().debug("column", orig, "mapped to", site.column, site.probability) try: self.session.add(site) except: runtime().debug(i, ":", j, " failure on column", orig, "mapped to", site.column, site.probability) raise runtime().debug("Finished with model") self.session.flush() # with open(self.codemlFile) as handle: # text = handle.read() # from hpf.hddb.db import CodeML # self.codeml = CodeML(tree_key=self.tree.id, # filename=self.codemlFile, # text=text) # self.session.add(self.codeml) # self.session.flush() # parser = LRTParser(self.alignment, self.alignment.culled_columns,self.codeml) # with open(self.codemlFile) as handle: # for selection in parser.parse(handle): # selection.codeml_key = self.codeml.id # self.session.merge(selection) runtime().debug("finished import codeml")
def main(fam, t, *files): global family, tree, session session = Session() family = int(fam) tree = int(t) for file in files: #runtime().set_debug(1) runtime().debug("Using file", file) with open(file) as handle: ps = PositiveSelectionParser().parse(handle) count = consume(imap(merge, ps)) runtime().debug("Found", count, "sites") session.commit() session.close()
def testParserSmallFile(self): parser = PositiveSelectionParser() with open(self.file+"1") as handle: models = list(parser.parse(handle)) assert len(models)==1 assert len(models[0].positive_selection)==4