def __init__(self, dbhandle, alignator, table_scop_test, min_profile_size=20, min_level=30, max_level=90, neighbours="pairsdb_90x90"): self.mMinProfileSize = min_profile_size self.mMinLevel = min_level self.mMaxLevel = max_level self.mTableNameNeighbours = neighbours ScopTester.__init__(self, dbhandle, alignator, table_scop_test) self.mTableNeighbours = TablePairsdbNeighbours(self.dbhandle) self.mTableNeighbours.SetName(self.mTableNameNeighbours) self.mBlastL = 0.3 # lambda self.mLogOddorScaleFactor = self.mBlastL self.mLogOddor = alignlib.makeLogOddorDirichlet( self.mLogOddorScaleFactor) self.mMaxLinesMali = 1000 self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed()
def startUp( self ): if self.isComplete(): return if self.mAppend: self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile + self.getSlice(), "a" ) self.mContinueAt = self.mProfileLibrary.getLastInsertedKey() self.info("processing will continue after %s" % (str( self.mContinueAt ) ) ) else: self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile + self.getSlice(), "w", force=self.mForce ) # set default values self.mProfileLibrary.setLogOddor( alignlib.makeLogOddorDirichlet( self.mScaleFactor ) ) self.mProfileLibrary.setRegularizor( alignlib.makeRegularizorDirichletPrecomputed() ) self.mProfileLibrary.setWeightor( alignlib.makeWeightor() ) alignlib.setDefaultEncoder( alignlib.getEncoder( alignlib.Protein20 ) )
def startUp(self): if self.isComplete(): return if self.mAppend: self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile + self.getSlice(), "a") self.mContinueAt = self.mProfileLibrary.getLastInsertedKey() self.info("processing will continue after %s" % (str(self.mContinueAt))) else: self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile + self.getSlice(), "w", force=self.mForce) # set default values self.mProfileLibrary.setLogOddor( alignlib.makeLogOddorDirichlet(self.mScaleFactor)) self.mProfileLibrary.setRegularizor( alignlib.makeRegularizorDirichletPrecomputed()) self.mProfileLibrary.setWeightor(alignlib.makeWeightor()) alignlib.setDefaultEncoder(alignlib.getEncoder(alignlib.Protein20))
def __init__(self, dbhandle, alignator, table_scop_test, min_profile_size = 20, min_level = 30, max_level = 90, neighbours = "pairsdb_90x90"): self.mMinProfileSize = min_profile_size self.mMinLevel = min_level self.mMaxLevel = max_level self.mTableNameNeighbours = neighbours ScopTester.__init__( self, dbhandle, alignator, table_scop_test ) self.mTableNeighbours = TablePairsdbNeighbours( self.dbhandle ) self.mTableNeighbours.SetName( self.mTableNameNeighbours) self.mBlastL = 0.3 # lambda self.mLogOddorScaleFactor = self.mBlastL self.mLogOddor = alignlib.makeLogOddorDirichlet( self.mLogOddorScaleFactor ) self.mMaxLinesMali = 1000 self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed()
def startUp( self ): if self.isComplete(): return ############################################### # create objects for algorithm alignlib.getDefaultToolkit().setEncoder( alignlib.getEncoder( alignlib.Protein20 ) ) self.mLogOddor = alignlib.makeLogOddorDirichlet( self.mScaleFactor ) self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed() self.mWeightor = alignlib.makeWeightor() alignlib.getDefaultToolkit().setRegularizor( self.mRegularizor ) alignlib.getDefaultToolkit().setLogOddor( self.mLogOddor ) alignlib.getDefaultToolkit().setWeightor( self.mWeightor ) if self.mUsePrebuiltProfiles: self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfiles, "r" ) self.mProfileLibrary.setWeightor( self.mWeightor ) self.mProfileLibrary.setLogOddor( self.mLogOddor ) self.mProfileLibrary.setRegularizor( self.mRegularizor ) else: self.mProfileLibrary = None self.mIndexedNeighbours = cadda.IndexedNeighbours( self.mFilenameGraph, self.mFilenameIndex ) self.mChecker = self.checkLinkZScore self.mHeader = ("qdomain", "sdomain", "weight", "passed", "qstart", "qend", "qali", "sstart", "send", "sali", "score", "naligned", "ngaps", "zscore" ) self.mAlignator = alignlib.makeAlignatorDPFull( alignlib.ALIGNMENT_LOCAL, self.mGop, self.mGep ) # the cache to store alignandum objects self.mCache = {} alignlib.setDefaultEncoder( alignlib.getEncoder( alignlib.Protein20 ) ) ## initialize counters self.mNPassed, self.mNFailed, self.mNNotFound = 0, 0, 0 self.mOutfile = self.openOutputStream( self.mFilenameAlignments ) if self.mContinueAt == None: self.mOutfile.write( "\t".join( self.mHeader ) + "\n" ) self.mOutfile.flush() self.mStartTime = time.time()
def main(): parser = optparse.OptionParser(version="%prog version: $Id$", usage=USAGE) parser.add_option( "--method", dest="method", type="choice", choices=("view", "align", "pileup", "profile"), help="method to perform [default=%default].", ) parser.add_option( "--mode", dest="mode", type="choice", choices=("global", "local"), help="alignment mode [default=%default]." ) parser.add_option("--gop", dest="gop", type="float", help="gap opening penalty [default=%default].") parser.add_option("--gep", dest="gep", type="float", help="gap extension penalty [default=%default].") parser.set_defaults( filename_graph="adda.graph", filename_index="adda.graph.idx", method="view", filename_fasta="adda", filename_config="adda.ini", append=False, force=False, mode="local", gop=-10.0, gep=-1.0, ) (options, args) = E.Start(parser) config = AddaIO.ConfigParser() config.read(os.path.expanduser(options.filename_config)) index = cadda.IndexedNeighbours(options.filename_graph, options.filename_index) alignlib.getDefaultToolkit().setEncoder(alignlib.getEncoder(alignlib.Protein20)) alignlib.getDefaultToolkit().setRegularizor(alignlib.makeRegularizorDirichletPrecomputed()) alignlib.getDefaultToolkit().setLogOddor(alignlib.makeLogOddorDirichlet(0.3)) alignlib.getDefaultToolkit().setWeightor(alignlib.makeWeightor()) fasta = IndexedFasta.IndexedFasta(options.filename_fasta) align = AddaProfiles.AddaProfiles(config, fasta=fasta) if options.method == "view": for nid in args: nid = int(args[0]) neighbours = index.getNeighbours(nid) for n in neighbours: print str(n) elif options.method == "pileup": if "_" in args[0]: nid, start, end = AddaIO.toTuple(args[0]) else: nid = int(args[0]) start, end = None, None neighbours = index.getNeighbours(nid) mali = align.buildMali(nid, neighbours) options.stdout.write("%s\n" % str(mali)) elif options.method == "profile": if "_" in args[0]: nid, start, end = AddaIO.toTuple(args[0]) else: nid = int(args[0]) start, end = None, None neighbours = index.getNeighbours(nid) mali = align.buildMali(nid, neighbours) prof = alignlib.makeProfile(mali) E.info("nid: %i, neighours=%i" % (nid, len(neighbours))) if start != None: prof.useSegment(start, end) prof.prepare() options.stdout.write("%s\n" % str(prof)) elif options.method == "align": nid1, start1, end1 = AddaIO.toTuple(args[0]) nid2, start2, end2 = AddaIO.toTuple(args[1]) align = AddaProfiles.AddaProfiles(config, fasta=fasta) if options.mode == "local": mode = alignlib.ALIGNMENT_LOCAL else: mode = alignlib.ALIGNMENT_GLOBAL alignator = alignlib.makeAlignatorDPFull(mode, options.gop, options.gep) def _buildProfile(nid, start, end): neighbours = index.getNeighbours(nid) mali = align.buildMali(nid, neighbours) prof = alignlib.makeProfile(mali) E.info("nid: %i, neighours=%i" % (nid, len(neighbours))) prof.useSegment(start, end) prof.prepare() seq = fasta.getSequence(nid) return alignlib.makeSequence(seq), prof seq1, prof1 = _buildProfile(nid1, start1, end1) seq2, prof2 = _buildProfile(nid2, start2, end2) result = alignlib.makeAlignmentVector() alignator.align(result, prof1, prof2) E.debug("%s\n" % str(result)) options.stdout.write( "%s vs %s: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i\n" % ( nid1, nid2, result.getScore(), result.getLength(), result.getNumGaps(), result.getRowFrom(), result.getRowTo(), result.getColFrom(), result.getColTo(), ) ) f = alignlib.AlignmentFormatExplicit(result, seq1, seq2) options.stdout.write("%s\n" % str(f)) E.Stop()
class Checker: mShortOptions = "t:D:V:n:c:" mLongOptions = [ "table=", "Database=", "Verbose=", "neighbours=", "no_cache", "masks=", "table_masks=" ] def __init__(self): self.mTableNameDomains = None self.mDatabase = "pairsdb" self.mMinOverlapResidues = 20 self.mMinCoverage = 0.2 self.mMinOverlap = 0.2 self.mDbhandle = Pairsdb() self.mLogLevel = 2 self.mMask = 1 self.mMethodsMask = (3, 4) self.mTableNameMasks = "nrdb90_masks" self.mTableNameSource = "pairsdb_90x90" self.mCache = 1 if not self.mDbhandle.Connect(): print "Connection failed" sys.exit(1) try: optlist, args = getopt.getopt(sys.argv[1:], self.mShortOptions, self.mLongOptions) except getopt.error, msg: print USAGE sys.exit(2) for o, a in optlist: if o in ("-t", "--table"): self.mTableNameDomains = a elif o in ("-D", "--Database"): self.mDatabase = a elif o in ("-V", "--Verbose"): self.mLogLevel = string.atoi(a) elif o in ("-n", "--neighbours"): self.mTableNameSource = a elif o in ("-c", "--no_cache"): self.mCache = 0 elif o in ("-m", "--masks"): self.mMethodsMask = map(string.atoi, string.split(a, ",")) elif o == "--table_masks": self.mTableNameMasks = a self.mProfiles = {} self.mIsProfile = {} self.mDbhandle.UseDatabase(self.mDatabase) # alignment parameters self.mGop = -10.0 self.mGep = -1.0 self.mLogOddorType = "Rescaled" self.mBlastL = 0.3 # lambda self.mLogOddorScaleFactor = self.mBlastL self.mLogOddor = alignlib.makeLogOddorDirichlet( self.mLogOddorScaleFactor) self.mMaxLinesMali = 1000 self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed() self.mWeightor = alignlib.makeNoWeightor() self.mFilter = None # minimum size for using a profile for alignments self.mMinProfileSize = 0 # threshold parameters for significance check self.mMinAlignmentScore = 83.0 self.mMinAlignmentMotifLength = 10 self.mTableSource = TablePairsdbNeighbours(self.mDbhandle) self.mTableSource.SetName(self.mTableNameSource) self.mTableMasks = Table_nrdb90_masks(self.mDbhandle) self.mTableMasks.SetName(self.mTableNameMasks) self.mTableNrdb = Table_nrdb(self.mDbhandle) self.mConnectionPairsdb = pairsdblib.Connection( self.mDbhandle.GetHost(), self.mDbhandle.GetUser(), self.mDbhandle.GetPassword(), self.mDbhandle.GetPort()) self.mConnectionPairsdb.Connect(self.mDatabase)
def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-o", "--gop", dest="gop", type="float", help="gap opening penalty [default=%default]." ) parser.add_option("-e", "--gep", dest="gep", type="float", help="gap extension penalty [default=%default]." ) parser.add_option("-m", "--mode", dest="mode", type="choice", choices = ("global", "local" ), help="alignment mode, global=nw, local=sw [default=%default]." ) parser.set_defaults( gop = -12.0, gep = -2.0, format= "fasta", mode = "local", ) ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start( parser, argv = argv ) if len(args) != 2: raise ValueError("please supply two multiple alignments in FASTA format.") mali1 = Mali.Mali() mali2 = Mali.Mali() E.info( "read 2 multiple alignments" ) mali1.readFromFile( IOTools.openFile( args[0], "r" ), format=options.format ) mali2.readFromFile( IOTools.openFile( args[1], "r" ), format=options.format ) cmali1 = Mali.convertMali2Alignlib( mali1 ) cmali2 = Mali.convertMali2Alignlib( mali2 ) if options.mode == "local": mode = alignlib.ALIGNMENT_LOCAL elif options.mode == "global": mode = alignlib.ALIGNMENT_GLOBAL alignator = alignlib.makeAlignatorDPFull( mode, options.gop, options.gep ) alignlib.setDefaultEncoder( alignlib.getEncoder( alignlib.Protein20) ) alignlib.setDefaultLogOddor( alignlib.makeLogOddorDirichlet( 0.3 ) ) alignlib.setDefaultRegularizor( alignlib.makeRegularizorDirichletPrecomputed() ) cprofile1 = alignlib.makeProfile( cmali1 ) cprofile2 = alignlib.makeProfile( cmali2 ) result = alignlib.makeAlignmentVector() alignator.align( result, cprofile1, cprofile2 ) E.debug( "result=\n%s" % alignlib.AlignmentFormatEmissions( result) ) cmali1.add( cmali2, result ) outmali = Mali.convertAlignlib2Mali( cmali1, identifiers = mali1.getIdentifiers() + mali2.getIdentifiers() ) outmali.writeToFile( options.stdout, format=options.format) ## write footer and output benchmark information. E.Stop()
def main(): parser = optparse.OptionParser(version="%prog version: $Id$", usage=USAGE) parser.add_option("--method", dest="method", type="choice", choices=("view", "align", "pileup", "profile"), help="method to perform [default=%default].") parser.add_option("--mode", dest="mode", type="choice", choices=("global", "local"), help="alignment mode [default=%default].") parser.add_option("--gop", dest="gop", type="float", help="gap opening penalty [default=%default].") parser.add_option("--gep", dest="gep", type="float", help="gap extension penalty [default=%default].") parser.set_defaults( filename_graph="adda.graph", filename_index="adda.graph.idx", method="view", filename_fasta="adda", filename_config="adda.ini", append=False, force=False, mode="local", gop=-10.0, gep=-1.0, ) (options, args) = E.Start(parser) config = AddaIO.ConfigParser() config.read(os.path.expanduser(options.filename_config)) index = cadda.IndexedNeighbours(options.filename_graph, options.filename_index) alignlib.getDefaultToolkit().setEncoder( alignlib.getEncoder(alignlib.Protein20)) alignlib.getDefaultToolkit().setRegularizor( alignlib.makeRegularizorDirichletPrecomputed()) alignlib.getDefaultToolkit().setLogOddor( alignlib.makeLogOddorDirichlet(0.3)) alignlib.getDefaultToolkit().setWeightor(alignlib.makeWeightor()) fasta = IndexedFasta.IndexedFasta(options.filename_fasta) align = AddaProfiles.AddaProfiles(config, fasta=fasta) if options.method == "view": for nid in args: nid = int(args[0]) neighbours = index.getNeighbours(nid) for n in neighbours: print str(n) elif options.method == "pileup": if "_" in args[0]: nid, start, end = AddaIO.toTuple(args[0]) else: nid = int(args[0]) start, end = None, None neighbours = index.getNeighbours(nid) mali = align.buildMali(nid, neighbours) options.stdout.write("%s\n" % str(mali)) elif options.method == "profile": if "_" in args[0]: nid, start, end = AddaIO.toTuple(args[0]) else: nid = int(args[0]) start, end = None, None neighbours = index.getNeighbours(nid) mali = align.buildMali(nid, neighbours) prof = alignlib.makeProfile(mali) E.info("nid: %i, neighours=%i" % (nid, len(neighbours))) if start != None: prof.useSegment(start, end) prof.prepare() options.stdout.write("%s\n" % str(prof)) elif options.method == "align": nid1, start1, end1 = AddaIO.toTuple(args[0]) nid2, start2, end2 = AddaIO.toTuple(args[1]) align = AddaProfiles.AddaProfiles(config, fasta=fasta) if options.mode == "local": mode = alignlib.ALIGNMENT_LOCAL else: mode = alignlib.ALIGNMENT_GLOBAL alignator = alignlib.makeAlignatorDPFull(mode, options.gop, options.gep) def _buildProfile(nid, start, end): neighbours = index.getNeighbours(nid) mali = align.buildMali(nid, neighbours) prof = alignlib.makeProfile(mali) E.info("nid: %i, neighours=%i" % (nid, len(neighbours))) prof.useSegment(start, end) prof.prepare() seq = fasta.getSequence(nid) return alignlib.makeSequence(seq), prof seq1, prof1 = _buildProfile(nid1, start1, end1) seq2, prof2 = _buildProfile(nid2, start2, end2) result = alignlib.makeAlignmentVector() alignator.align(result, prof1, prof2) E.debug("%s\n" % str(result)) options.stdout.write( "%s vs %s: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i\n" %\ (nid1, nid2, result.getScore(), result.getLength(), result.getNumGaps(), result.getRowFrom(), result.getRowTo(), result.getColFrom(), result.getColTo())) f = alignlib.AlignmentFormatExplicit(result, seq1, seq2) options.stdout.write("%s\n" % str(f)) E.Stop()