예제 #1
0
    def __init__(self,
                 dbhandle,
                 alignator,
                 table_scop_test,
                 min_profile_size=20,
                 min_level=30,
                 max_level=90,
                 neighbours="pairsdb_90x90"):

        self.mMinProfileSize = min_profile_size
        self.mMinLevel = min_level
        self.mMaxLevel = max_level
        self.mTableNameNeighbours = neighbours

        ScopTester.__init__(self, dbhandle, alignator, table_scop_test)

        self.mTableNeighbours = TablePairsdbNeighbours(self.dbhandle)
        self.mTableNeighbours.SetName(self.mTableNameNeighbours)

        self.mBlastL = 0.3  # lambda
        self.mLogOddorScaleFactor = self.mBlastL
        self.mLogOddor = alignlib.makeLogOddorDirichlet(
            self.mLogOddorScaleFactor)
        self.mMaxLinesMali = 1000
        self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed()
예제 #2
0
    def startUp( self ):

        if self.isComplete(): return

        if self.mAppend:
            self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile + self.getSlice(),
                                                                  "a" )
            self.mContinueAt = self.mProfileLibrary.getLastInsertedKey()
            self.info("processing will continue after %s" % (str( self.mContinueAt ) ) )
        else:
            self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile + self.getSlice(),
                                                                  "w",
                                                                  force=self.mForce )


        # set default values
        self.mProfileLibrary.setLogOddor( alignlib.makeLogOddorDirichlet( self.mScaleFactor ) )
        self.mProfileLibrary.setRegularizor( alignlib.makeRegularizorDirichletPrecomputed() )
        self.mProfileLibrary.setWeightor( alignlib.makeWeightor() )
        alignlib.setDefaultEncoder( alignlib.getEncoder( alignlib.Protein20 ) )
예제 #3
0
    def startUp(self):

        if self.isComplete(): return

        if self.mAppend:
            self.mProfileLibrary = ProfileLibrary.ProfileLibrary(
                self.mFilenameProfile + self.getSlice(), "a")
            self.mContinueAt = self.mProfileLibrary.getLastInsertedKey()
            self.info("processing will continue after %s" %
                      (str(self.mContinueAt)))
        else:
            self.mProfileLibrary = ProfileLibrary.ProfileLibrary(
                self.mFilenameProfile + self.getSlice(),
                "w",
                force=self.mForce)

        # set default values
        self.mProfileLibrary.setLogOddor(
            alignlib.makeLogOddorDirichlet(self.mScaleFactor))
        self.mProfileLibrary.setRegularizor(
            alignlib.makeRegularizorDirichletPrecomputed())
        self.mProfileLibrary.setWeightor(alignlib.makeWeightor())
        alignlib.setDefaultEncoder(alignlib.getEncoder(alignlib.Protein20))
예제 #4
0
파일: ScopTester.py 프로젝트: BioXiao/cgat
    def __init__(self,
                 dbhandle,
                 alignator,
                 table_scop_test,
                 min_profile_size = 20,
                 min_level = 30,
                 max_level = 90,
                 neighbours = "pairsdb_90x90"):
        
        self.mMinProfileSize = min_profile_size
        self.mMinLevel = min_level
        self.mMaxLevel = max_level
        self.mTableNameNeighbours = neighbours
        
        ScopTester.__init__( self, dbhandle, alignator, table_scop_test )

        self.mTableNeighbours = TablePairsdbNeighbours( self.dbhandle )
        self.mTableNeighbours.SetName( self.mTableNameNeighbours)

        self.mBlastL             = 0.3                  # lambda    
        self.mLogOddorScaleFactor = self.mBlastL
        self.mLogOddor    = alignlib.makeLogOddorDirichlet( self.mLogOddorScaleFactor )
        self.mMaxLinesMali = 1000
        self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed()  
예제 #5
0
    def startUp( self ):

        if self.isComplete(): return

        ###############################################
        # create objects for algorithm 
        alignlib.getDefaultToolkit().setEncoder( alignlib.getEncoder( alignlib.Protein20 ) )
        self.mLogOddor    = alignlib.makeLogOddorDirichlet( self.mScaleFactor )
        self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed()
        self.mWeightor    = alignlib.makeWeightor()

        alignlib.getDefaultToolkit().setRegularizor( self.mRegularizor )
        alignlib.getDefaultToolkit().setLogOddor( self.mLogOddor )
        alignlib.getDefaultToolkit().setWeightor( self.mWeightor )


        if self.mUsePrebuiltProfiles:
            self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfiles, "r" )
            self.mProfileLibrary.setWeightor( self.mWeightor )
            self.mProfileLibrary.setLogOddor( self.mLogOddor )
            self.mProfileLibrary.setRegularizor( self.mRegularizor )

        else:
            self.mProfileLibrary = None
            self.mIndexedNeighbours = cadda.IndexedNeighbours( self.mFilenameGraph, self.mFilenameIndex )

        self.mChecker = self.checkLinkZScore
        self.mHeader = ("qdomain",
                        "sdomain",
                        "weight",
                        "passed",
                        "qstart",
                        "qend",
                        "qali",
                        "sstart",
                        "send",
                        "sali",
                        "score",
                        "naligned",
                        "ngaps",
                        "zscore" )

        self.mAlignator = alignlib.makeAlignatorDPFull( alignlib.ALIGNMENT_LOCAL, 
                                                        self.mGop,
                                                        self.mGep )

        # the cache to store alignandum objects
        self.mCache = {}        
        
        alignlib.setDefaultEncoder( alignlib.getEncoder( alignlib.Protein20 ) )

        ## initialize counters
        self.mNPassed, self.mNFailed, self.mNNotFound = 0, 0, 0

        self.mOutfile = self.openOutputStream( self.mFilenameAlignments )

        if self.mContinueAt == None:
            self.mOutfile.write( "\t".join( self.mHeader ) + "\n" ) 
            self.mOutfile.flush()

        self.mStartTime = time.time()
예제 #6
0
def main():

    parser = optparse.OptionParser(version="%prog version: $Id$", usage=USAGE)

    parser.add_option(
        "--method",
        dest="method",
        type="choice",
        choices=("view", "align", "pileup", "profile"),
        help="method to perform [default=%default].",
    )

    parser.add_option(
        "--mode", dest="mode", type="choice", choices=("global", "local"), help="alignment mode [default=%default]."
    )

    parser.add_option("--gop", dest="gop", type="float", help="gap opening penalty [default=%default].")

    parser.add_option("--gep", dest="gep", type="float", help="gap extension penalty [default=%default].")

    parser.set_defaults(
        filename_graph="adda.graph",
        filename_index="adda.graph.idx",
        method="view",
        filename_fasta="adda",
        filename_config="adda.ini",
        append=False,
        force=False,
        mode="local",
        gop=-10.0,
        gep=-1.0,
    )

    (options, args) = E.Start(parser)

    config = AddaIO.ConfigParser()
    config.read(os.path.expanduser(options.filename_config))

    index = cadda.IndexedNeighbours(options.filename_graph, options.filename_index)

    alignlib.getDefaultToolkit().setEncoder(alignlib.getEncoder(alignlib.Protein20))
    alignlib.getDefaultToolkit().setRegularizor(alignlib.makeRegularizorDirichletPrecomputed())
    alignlib.getDefaultToolkit().setLogOddor(alignlib.makeLogOddorDirichlet(0.3))
    alignlib.getDefaultToolkit().setWeightor(alignlib.makeWeightor())

    fasta = IndexedFasta.IndexedFasta(options.filename_fasta)
    align = AddaProfiles.AddaProfiles(config, fasta=fasta)

    if options.method == "view":
        for nid in args:
            nid = int(args[0])

            neighbours = index.getNeighbours(nid)

            for n in neighbours:
                print str(n)

    elif options.method == "pileup":

        if "_" in args[0]:
            nid, start, end = AddaIO.toTuple(args[0])
        else:
            nid = int(args[0])
            start, end = None, None

        neighbours = index.getNeighbours(nid)
        mali = align.buildMali(nid, neighbours)
        options.stdout.write("%s\n" % str(mali))

    elif options.method == "profile":

        if "_" in args[0]:
            nid, start, end = AddaIO.toTuple(args[0])
        else:
            nid = int(args[0])
            start, end = None, None

        neighbours = index.getNeighbours(nid)
        mali = align.buildMali(nid, neighbours)
        prof = alignlib.makeProfile(mali)
        E.info("nid: %i, neighours=%i" % (nid, len(neighbours)))
        if start != None:
            prof.useSegment(start, end)
        prof.prepare()
        options.stdout.write("%s\n" % str(prof))

    elif options.method == "align":

        nid1, start1, end1 = AddaIO.toTuple(args[0])
        nid2, start2, end2 = AddaIO.toTuple(args[1])

        align = AddaProfiles.AddaProfiles(config, fasta=fasta)

        if options.mode == "local":
            mode = alignlib.ALIGNMENT_LOCAL
        else:
            mode = alignlib.ALIGNMENT_GLOBAL

        alignator = alignlib.makeAlignatorDPFull(mode, options.gop, options.gep)

        def _buildProfile(nid, start, end):
            neighbours = index.getNeighbours(nid)
            mali = align.buildMali(nid, neighbours)
            prof = alignlib.makeProfile(mali)
            E.info("nid: %i, neighours=%i" % (nid, len(neighbours)))
            prof.useSegment(start, end)
            prof.prepare()
            seq = fasta.getSequence(nid)
            return alignlib.makeSequence(seq), prof

        seq1, prof1 = _buildProfile(nid1, start1, end1)
        seq2, prof2 = _buildProfile(nid2, start2, end2)

        result = alignlib.makeAlignmentVector()

        alignator.align(result, prof1, prof2)

        E.debug("%s\n" % str(result))

        options.stdout.write(
            "%s vs %s: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i\n"
            % (
                nid1,
                nid2,
                result.getScore(),
                result.getLength(),
                result.getNumGaps(),
                result.getRowFrom(),
                result.getRowTo(),
                result.getColFrom(),
                result.getColTo(),
            )
        )

        f = alignlib.AlignmentFormatExplicit(result, seq1, seq2)
        options.stdout.write("%s\n" % str(f))

    E.Stop()
예제 #7
0
class Checker:

    mShortOptions = "t:D:V:n:c:"
    mLongOptions = [
        "table=", "Database=", "Verbose=", "neighbours=", "no_cache", "masks=",
        "table_masks="
    ]

    def __init__(self):

        self.mTableNameDomains = None
        self.mDatabase = "pairsdb"
        self.mMinOverlapResidues = 20
        self.mMinCoverage = 0.2
        self.mMinOverlap = 0.2
        self.mDbhandle = Pairsdb()
        self.mLogLevel = 2
        self.mMask = 1
        self.mMethodsMask = (3, 4)
        self.mTableNameMasks = "nrdb90_masks"
        self.mTableNameSource = "pairsdb_90x90"
        self.mCache = 1

        if not self.mDbhandle.Connect():
            print "Connection failed"
            sys.exit(1)

        try:
            optlist, args = getopt.getopt(sys.argv[1:], self.mShortOptions,
                                          self.mLongOptions)
        except getopt.error, msg:
            print USAGE
            sys.exit(2)

        for o, a in optlist:
            if o in ("-t", "--table"):
                self.mTableNameDomains = a
            elif o in ("-D", "--Database"):
                self.mDatabase = a
            elif o in ("-V", "--Verbose"):
                self.mLogLevel = string.atoi(a)
            elif o in ("-n", "--neighbours"):
                self.mTableNameSource = a
            elif o in ("-c", "--no_cache"):
                self.mCache = 0
            elif o in ("-m", "--masks"):
                self.mMethodsMask = map(string.atoi, string.split(a, ","))
            elif o == "--table_masks":
                self.mTableNameMasks = a

        self.mProfiles = {}
        self.mIsProfile = {}

        self.mDbhandle.UseDatabase(self.mDatabase)

        # alignment parameters
        self.mGop = -10.0
        self.mGep = -1.0
        self.mLogOddorType = "Rescaled"
        self.mBlastL = 0.3  # lambda
        self.mLogOddorScaleFactor = self.mBlastL
        self.mLogOddor = alignlib.makeLogOddorDirichlet(
            self.mLogOddorScaleFactor)
        self.mMaxLinesMali = 1000
        self.mRegularizor = alignlib.makeRegularizorDirichletPrecomputed()
        self.mWeightor = alignlib.makeNoWeightor()
        self.mFilter = None

        # minimum size for using a profile for alignments
        self.mMinProfileSize = 0

        # threshold parameters for significance check
        self.mMinAlignmentScore = 83.0
        self.mMinAlignmentMotifLength = 10

        self.mTableSource = TablePairsdbNeighbours(self.mDbhandle)
        self.mTableSource.SetName(self.mTableNameSource)

        self.mTableMasks = Table_nrdb90_masks(self.mDbhandle)
        self.mTableMasks.SetName(self.mTableNameMasks)
        self.mTableNrdb = Table_nrdb(self.mDbhandle)

        self.mConnectionPairsdb = pairsdblib.Connection(
            self.mDbhandle.GetHost(), self.mDbhandle.GetUser(),
            self.mDbhandle.GetPassword(), self.mDbhandle.GetPort())

        self.mConnectionPairsdb.Connect(self.mDatabase)
예제 #8
0
def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", 
                                    usage = globals()["__doc__"] )

    parser.add_option("-o", "--gop", dest="gop", type="float",
                      help="gap opening penalty [default=%default]."  )

    parser.add_option("-e", "--gep", dest="gep", type="float",
                      help="gap extension penalty [default=%default]."  )

    parser.add_option("-m", "--mode", dest="mode", type="choice",
                      choices = ("global", "local" ),
                      help="alignment mode, global=nw, local=sw [default=%default]."  )

    parser.set_defaults(
        gop = -12.0,
        gep = -2.0,
        format= "fasta",
        mode = "local",
        )

    ## add common options (-h/--help, ...) and parse command line 
    (options, args) = E.Start( parser, argv = argv )

    if len(args) != 2: raise ValueError("please supply two multiple alignments in FASTA format.")

    mali1 = Mali.Mali()
    mali2 = Mali.Mali()

    E.info( "read 2 multiple alignments" )

    mali1.readFromFile( IOTools.openFile( args[0], "r" ), format=options.format )
    mali2.readFromFile( IOTools.openFile( args[1], "r" ), format=options.format )

    cmali1 = Mali.convertMali2Alignlib( mali1 )
    cmali2 = Mali.convertMali2Alignlib( mali2 )

    if options.mode == "local":
        mode = alignlib.ALIGNMENT_LOCAL
    elif options.mode == "global":
        mode = alignlib.ALIGNMENT_GLOBAL
        
    alignator = alignlib.makeAlignatorDPFull( mode,
                                              options.gop, options.gep )

    alignlib.setDefaultEncoder( alignlib.getEncoder( alignlib.Protein20) )
    alignlib.setDefaultLogOddor( alignlib.makeLogOddorDirichlet( 0.3 ) )
    alignlib.setDefaultRegularizor( alignlib.makeRegularizorDirichletPrecomputed() )

    cprofile1 = alignlib.makeProfile( cmali1 )
    cprofile2 = alignlib.makeProfile( cmali2 )

    result = alignlib.makeAlignmentVector()

    alignator.align( result, cprofile1, cprofile2 )

    E.debug( "result=\n%s" % alignlib.AlignmentFormatEmissions( result) )

    cmali1.add( cmali2, result )

    outmali = Mali.convertAlignlib2Mali( cmali1,
                                         identifiers = mali1.getIdentifiers() + mali2.getIdentifiers() )
    
    outmali.writeToFile( options.stdout, format=options.format)

    ## write footer and output benchmark information.
    E.Stop()
예제 #9
0
def main():

    parser = optparse.OptionParser(version="%prog version: $Id$", usage=USAGE)

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=("view", "align", "pileup", "profile"),
                      help="method to perform [default=%default].")

    parser.add_option("--mode",
                      dest="mode",
                      type="choice",
                      choices=("global", "local"),
                      help="alignment mode [default=%default].")

    parser.add_option("--gop",
                      dest="gop",
                      type="float",
                      help="gap opening penalty [default=%default].")

    parser.add_option("--gep",
                      dest="gep",
                      type="float",
                      help="gap extension penalty [default=%default].")

    parser.set_defaults(
        filename_graph="adda.graph",
        filename_index="adda.graph.idx",
        method="view",
        filename_fasta="adda",
        filename_config="adda.ini",
        append=False,
        force=False,
        mode="local",
        gop=-10.0,
        gep=-1.0,
    )

    (options, args) = E.Start(parser)

    config = AddaIO.ConfigParser()
    config.read(os.path.expanduser(options.filename_config))

    index = cadda.IndexedNeighbours(options.filename_graph,
                                    options.filename_index)

    alignlib.getDefaultToolkit().setEncoder(
        alignlib.getEncoder(alignlib.Protein20))
    alignlib.getDefaultToolkit().setRegularizor(
        alignlib.makeRegularizorDirichletPrecomputed())
    alignlib.getDefaultToolkit().setLogOddor(
        alignlib.makeLogOddorDirichlet(0.3))
    alignlib.getDefaultToolkit().setWeightor(alignlib.makeWeightor())

    fasta = IndexedFasta.IndexedFasta(options.filename_fasta)
    align = AddaProfiles.AddaProfiles(config, fasta=fasta)

    if options.method == "view":
        for nid in args:
            nid = int(args[0])

            neighbours = index.getNeighbours(nid)

            for n in neighbours:
                print str(n)

    elif options.method == "pileup":

        if "_" in args[0]:
            nid, start, end = AddaIO.toTuple(args[0])
        else:
            nid = int(args[0])
            start, end = None, None

        neighbours = index.getNeighbours(nid)
        mali = align.buildMali(nid, neighbours)
        options.stdout.write("%s\n" % str(mali))

    elif options.method == "profile":

        if "_" in args[0]:
            nid, start, end = AddaIO.toTuple(args[0])
        else:
            nid = int(args[0])
            start, end = None, None

        neighbours = index.getNeighbours(nid)
        mali = align.buildMali(nid, neighbours)
        prof = alignlib.makeProfile(mali)
        E.info("nid: %i, neighours=%i" % (nid, len(neighbours)))
        if start != None:
            prof.useSegment(start, end)
        prof.prepare()
        options.stdout.write("%s\n" % str(prof))

    elif options.method == "align":

        nid1, start1, end1 = AddaIO.toTuple(args[0])
        nid2, start2, end2 = AddaIO.toTuple(args[1])

        align = AddaProfiles.AddaProfiles(config, fasta=fasta)

        if options.mode == "local":
            mode = alignlib.ALIGNMENT_LOCAL
        else:
            mode = alignlib.ALIGNMENT_GLOBAL

        alignator = alignlib.makeAlignatorDPFull(mode, options.gop,
                                                 options.gep)

        def _buildProfile(nid, start, end):
            neighbours = index.getNeighbours(nid)
            mali = align.buildMali(nid, neighbours)
            prof = alignlib.makeProfile(mali)
            E.info("nid: %i, neighours=%i" % (nid, len(neighbours)))
            prof.useSegment(start, end)
            prof.prepare()
            seq = fasta.getSequence(nid)
            return alignlib.makeSequence(seq), prof

        seq1, prof1 = _buildProfile(nid1, start1, end1)
        seq2, prof2 = _buildProfile(nid2, start2, end2)

        result = alignlib.makeAlignmentVector()

        alignator.align(result, prof1, prof2)

        E.debug("%s\n" % str(result))

        options.stdout.write( "%s vs %s: score=%5.2f, length=%i, numgaps=%i, row_from=%i, row_to=%i, col_from=%i, col_to=%i\n" %\
                                  (nid1, nid2,
                                   result.getScore(),
                                   result.getLength(),
                                   result.getNumGaps(),
                                   result.getRowFrom(), result.getRowTo(),
                                   result.getColFrom(), result.getColTo()))

        f = alignlib.AlignmentFormatExplicit(result, seq1, seq2)
        options.stdout.write("%s\n" % str(f))

    E.Stop()