Exemple #1
0
def checkListStatus( cl, update=0, force_keys=[], version=-1 ):
    """
    If the list is to be only updated then check that the list/sublist has
    entries that can be updated. If an unupdatable list is sent to
    the contactMaster the calculation will halt.

    update     - 1||0, are we in update mode? if not return whole list
    force_keys - 1||0, calculation is restricted to certain keys
    """
    if not update:
        return cl

    ## all complexes where force_keys are missing or where values are None
    if version == -1:
        todo = [ c for c in cl
                 if None in c.values(keys=force_keys, default=None) ]
    else:
        todo = [ c[version] for c in cl
                 if None in c.values(keys=force_keys, default=None) ]

    ## add missing force keys with value None
    if force_keys:
        for c in todo:
            missing = [ k for k in force_keys if k not in c ]

            for k in missing:
                c[ k ] = None

    if len( todo ) > 0:
        msg = "\n%i out of %i complexes are to be updated\n\n"
        t.flushPrint( msg%(len(todo), len(cl) ) )
        return cl
    else:
        t.flushPrint( "\nList contains no data that can be updated\n" )
Exemple #2
0
def checkListStatus( cl, update=0, force_keys=[], version=-1 ):
    """
    If the list is to be only updated then check that the list/sublist has
    entries that can be updated. If an unupdatable list is sent to
    the contactMaster the calculation will halt.

    update     - 1||0, are we in update mode? if not return whole list
    force_keys - 1||0, calculation is restricted to certain keys
    """
    if not update:
        return cl

    ## all complexes where force_keys are missing or where values are None
    if version == -1:
        todo = [ c for c in cl
                 if None in c.values(keys=force_keys, default=None) ]
    else:
        todo = [ c[version] for c in cl
                 if None in c.values(keys=force_keys, default=None) ]

    ## add missing force keys with value None
    if force_keys:
        for c in todo:
            missing = [ k for k in force_keys if k not in c ]

            for k in missing:
                c[ k ] = None

    if len( todo ) > 0:
        msg = "\n%i out of %i complexes are to be updated\n\n"
        t.flushPrint( msg%(len(todo), len(cl) ) )
        return cl
    else:
        t.flushPrint( "\nList contains no data that can be updated\n" )
Exemple #3
0
    def go(self, dict):
        """
        Run Modeller job.

        @param dict: dictionary with run parameters
        @type  dict: {param:value} 
        """
        d = {}
        val = None

        try:

            T.flushPrint( self.params['progress_str'] )
            for id, val in dict.items():

                modeller_log = LogFile( '%s/Modeller.log' %val["outFolder"] )

                d[id] = val

                m = M( outFolder= val["outFolder"],
                       fasta_target=val["fastaTarget"], f_pir=val["f_pir"],
                       template_folder=val["template_folder"],
                       starting_model=val["starting_model"],
                       ending_model=val["ending_model"],
                       log=modeller_log )

                m.run()

        except Exception, why:
            self.reportError( 'ERROR '+str(why), val )
def pairwise_cast( models ):
    """
    atom cast all models in list with each other
    -> modified list
    """
    for i in range( len( models ) ):

        for j in range( i+1, len( models) ):

            T.flushPrint('.')

            m1 = models[i]
            m2 = models[j]

            eq_res, eq_atm = m1.equals( m2 )

            if not (eq_res and eq_atm):

                i1, i2 = m1.compareAtoms( m2 )

                delta_1 = len( m1 ) - len( i1 )
                delta_2 = len( m2 ) - len( i2 )

                models[i].keep( i1 )
                models[j].keep( i2 )

                f1 = T.stripFilename( m1.sourceFile() )
                f2 = T.stripFilename( m2.sourceFile() )

                print "Removed %i atoms from %s" % (delta_1, f1)
                print "Removed %i atoms from %s" % (delta_2, f2)

    return models
Exemple #5
0
    def go(self, dict):
        """
        Run Modeller job.

        @param dict: dictionary with run parameters
        @type  dict: {param:value} 
        """
        d = {}
        val = None

        try:

            T.flushPrint(self.params['progress_str'])
            for id, val in dict.items():

                modeller_log = LogFile('%s/Modeller.log' % val["outFolder"])

                d[id] = val

                m = M(outFolder=val["outFolder"],
                      fasta_target=val["fastaTarget"],
                      f_pir=val["f_pir"],
                      template_folder=val["template_folder"],
                      starting_model=val["starting_model"],
                      ending_model=val["ending_model"],
                      log=modeller_log)

                m.run()

        except Exception, why:
            self.reportError('ERROR ' + str(why), val)
Exemple #6
0
def pairwise_cast(models):
    """
    atom cast all models in list with each other
    -> modified list
    """
    for i in range(len(models)):

        for j in range(i + 1, len(models)):

            T.flushPrint('.')

            m1 = models[i]
            m2 = models[j]

            eq_res, eq_atm = m1.equals(m2)

            if not (eq_res and eq_atm):

                i1, i2 = m1.compareAtoms(m2)

                delta_1 = len(m1) - len(i1)
                delta_2 = len(m2) - len(i2)

                models[i].keep(i1)
                models[j].keep(i2)

                f1 = T.stripFilename(m1.sourceFile())
                f2 = T.stripFilename(m2.sourceFile())

                print "Removed %i atoms from %s" % (delta_1, f1)
                print "Removed %i atoms from %s" % (delta_2, f2)

    return models
Exemple #7
0
    def go(self, cmplxDic):
        """
        Obtain contact matrix for all complexes.

        @param cmplxDic: dictionary of complexes::
                         {soln:Complex, soln:Complex, ...} 
        @type  cmplxDic: {int:Complex}

        @return: similar dictionary with Complex.info['soln'] as keys and
                 file names of matrices as value::
                 { soln : fname, soln : fname ....}
        @rtype: {int:str}

        """
        result = {}

        startTime = time.time()

        for soln, c in cmplxDic.items():
            T.flushPrint("%i," % soln)

            ##             if not os.path.exists( T.absfile('~/debug.dic') ):
            ##                 T.dump( cmplxDic,  T.absfile('~/debug.dic') )

            self.calcContacts(soln, c)

            self.calcInterfaceRms(soln, c)

            self.calcReducedContacts(soln, c)

            ## TODO: Prosa will not run when called via conatacSlave, runs as it should when
            ##        called as c.prosa2003Energy() in the interpreter. What's wroong here?
            ##        For mow the Prosa calculation is skipped.
            ##
            self.calcProsa(soln, c)

            self.calcPairScore(soln, c)  ## uses res-contacts

            self.calcFoldX(soln, c)  ##uses rec/lig.info['foldX'] if available

            for method in ['cons_ent', 'cons_max', 'cons_abs']:
                self.calcConservation(soln, c, method)

            c['__version_contacter'] = self.version()

            result[soln] = c

            c.slim()


##             if not os.path.exists(T.absfile('~/debug_afterslave.dic') ):
##                 T.dump( cmplxDic,  T.absfile('~/debug_afterslave.dic') )


        print "\navg time for last %i complexes: %f s" %\
              ( len(cmplxDic), (time.time()-startTime)/len(cmplxDic))

        return result
Exemple #8
0
    def go(self, cmplxDic):
        """
        Obtain contact matrix for all complexes.

        @param cmplxDic: dictionary of complexes::
                         {soln:Complex, soln:Complex, ...} 
        @type  cmplxDic: {int:Complex}

        @return: similar dictionary with Complex.info['soln'] as keys and
                 file names of matrices as value::
                 { soln : fname, soln : fname ....}
        @rtype: {int:str}

        """
        result = {}

        startTime = time.time()

        for soln, c in cmplxDic.items():
            T.flushPrint( "%i," % soln )

##             if not os.path.exists( T.absfile('~/debug.dic') ):
##                 T.dump( cmplxDic,  T.absfile('~/debug.dic') )

            self.calcContacts( soln, c )

            self.calcInterfaceRms( soln, c )

            self.calcReducedContacts( soln, c )

## TODO: Prosa will not run when called via conatacSlave, runs as it should when
##        called as c.prosa2003Energy() in the interpreter. What's wroong here?
##        For mow the Prosa calculation is skipped.
##
            self.calcProsa( soln, c )

            self.calcPairScore( soln, c )  ## uses res-contacts

            self.calcFoldX( soln, c ) ##uses rec/lig.info['foldX'] if available

            for method in ['cons_ent', 'cons_max', 'cons_abs']:
                self.calcConservation( soln, c, method )

            c['__version_contacter'] = self.version()

            result[ soln ] = c

            c.slim()

##             if not os.path.exists(T.absfile('~/debug_afterslave.dic') ):
##                 T.dump( cmplxDic,  T.absfile('~/debug_afterslave.dic') )


        print "\navg time for last %i complexes: %f s" %\
              ( len(cmplxDic), (time.time()-startTime)/len(cmplxDic))

        return result
Exemple #9
0
def sloppyload( f ):
    """
    f - str, file name
    -> any, unpickled object
    """
    try:
        T.flushPrint( "Loading " + str(f) + '\n' )

        return T.load( T.absfile( f ) )

    except cPickle.UnpicklingError:

        print "Trying to load %s in sloppy mode..." % f
        return PickleUpgrader(open(T.absfile(f))).load()
Exemple #10
0
def sloppyload( f ):
    """
    f - str, file name
    -> any, unpickled object
    """
    try:
        T.flushPrint( "Loading " + str(f) + '\n' )

        return T.load( T.absfile( f ) )

    except cPickle.UnpicklingError:

        print "Trying to load %s in sloppy mode..." % f
        return PickleUpgrader(open(T.absfile(f))).load()
Exemple #11
0
    def go(self, dict):
        """
        Run alignment job.

        @param dict: dictionary with run parameters
        @type  dict: {param:value} 
        """
        d = {}
        val = None

        try:

            T.flushPrint(self.progress_str)
            for id, val in dict.items():

                aligner_log = LogFile('%s/Aligner.log' % val["outFolder"])

                d[id] = val

                aligner_log.add('Slave aligns %s on %s' % (id, os.uname()[1]))

                a = Aligner(outFolder=val["outFolder"], log=aligner_log)

                ## For the cross validation
                if not os.path.exists(val["outFolder"] + TC.F_COFFEE):

                    input_file = val["outFolder"] + VS.F_TCOFFEE

                    alpha_path = self.prepareT_coffee(input_file)

                    a.align_for_modeller_inp(
                        pdbFiles=alpha_path,
                        fasta_templates=val["fastaTemplates"],
                        fasta_sequences=val["fastaSequences"],
                        fasta_target=val["fastaTarget"])

                ## For a classic project folder
                else:
                    a.align_for_modeller_inp(
                        pdbFiles=val["pdbFiles"],
                        fasta_templates=val["fastaTemplates"],
                        fasta_sequences=val["fastaSequences"],
                        fasta_target=val["fastaTarget"])

                a.go()

        except Exception, why:
            self.reportError('ERROR ' + str(why), val)
Exemple #12
0
    def go(self, dict):
        """
        Run alignment job.

        @param dict: dictionary with run parameters
        @type  dict: {param:value} 
        """
        d = {}
        val = None

        try:

            T.flushPrint( self.progress_str )
            for id, val in dict.items():

                aligner_log = LogFile( '%s/Aligner.log' %val["outFolder"] )

                d[id] = val

                aligner_log.add('Slave aligns %s on %s' % (id,os.uname()[1]) )

                a = Aligner( outFolder= val["outFolder"], log=aligner_log)

                ## For the cross validation
                if not os.path.exists(val["outFolder"] + TC.F_COFFEE):

                    input_file = val["outFolder"] + VS.F_TCOFFEE

                    alpha_path = self.prepareT_coffee(input_file)

                    a.align_for_modeller_inp( pdbFiles=alpha_path,
                              fasta_templates=val["fastaTemplates"],
                              fasta_sequences=val["fastaSequences"],
                              fasta_target=val["fastaTarget"])

                ## For a classic project folder    
                else:
                    a.align_for_modeller_inp(pdbFiles=val["pdbFiles"],
                              fasta_templates=val["fastaTemplates"],
                              fasta_sequences=val["fastaSequences"],
                              fasta_target=val["fastaTarget"])

                a.go()


        except Exception, why:
            self.reportError( 'ERROR '+str(why), val )
Exemple #13
0
    def go(self, dict):
        """
        Perform slave task.
        """
        d = {}

        T.flushPrint(self.params['progress_str'])
        for id, val in dict.items():

            d[id] = val + 1

            for i in range(1, 1000):
                f = 1.0 * i / 1200232112312.11

            T.flushPrint(str(id) + ' ')
            time.sleep(0.5)

        print "Done."
        return d
Exemple #14
0
    def go(self, dict):
        """
        Perform slave task.
        """
        d = {}

        T.flushPrint( self.params['progress_str'] )
        for id, val in dict.items():

            d[id] = val+1

            for i in range(1, 1000):
                f = 1.0 * i / 1200232112312.11

            T.flushPrint( str(id) + ' ' )
            time.sleep(0.5)

        print "Done."
        return d
Exemple #15
0
    def setHexComplexes(self, com_lst, n=20 ):
        """
        add contact matrices of hex-generated (wrong) complexes for comparison
        
        @param com_lst: ComplexList with contacts calculated
        @type  com_lst: ComplexList
        """
        t.flushPrint('adding hex-generated complexes')

        self.hexContacts = []

#        f = lambda a: molUtils.elementType(a['element']) == 'p'

        i = 0
        while i < n:

            com = com_lst[i]
            t.flushPrint('#')

            try:
                if com['fractNatCont'] == 0.0:#com['fnac_10'] == 0.0:
                    com.rec_model.remove( com.rec().maskH() )
                    com.lig_model.remove( com.lig_model.maskH() )

                    com.rec_model = com.rec_model.sort()
                    com.lig_model = com.lig_model.sort()

                    com.rec_model.keep( self.i_free_rec )
                    com.lig_model.keep( self.i_free_lig )
                    com.lig_transformed = None

                    self.hexContacts += [ com.atomContacts() ]

                else:
                    n += 1
                i+= 1
            except:
                print t.lastError()

        self.hexSurfaces = self.__categorizeHexSurf( 0.2 )
Exemple #16
0
    def go(self, jobs):
        """
        The calculation.
        
        @param jobs: dictionary with { int_id : str_protocol }
        @type  jobs: dict

        @return: result from AmberEntropist.run()
        @rtype: dict
        """
        result = {}

        startTime = time.time()

        for id, protocol in jobs.items():

            try:
                T.flushPrint("%s " % str(id))

                protocol.update({'nice': self.nice})

                x = None  ## free memory from previous run

                x = AmberEntropist(**protocol)

                x.run()

                r = x.result

                if r:
                    r['__version_AmberEntropist'] = x.version()

                    result[id] = r
                else:
                    result[id] = None

            except EntropistError, why:
                self.reportError(str(type(why)), id)
            except IOError, why:
                self.reportError(str(why), id)
Exemple #17
0
    def go(self, jobs):
        """
        The calculation.
        
        @param jobs: dictionary with { int_id : str_protocol }
        @type  jobs: dict

        @return: result from AmberEntropist.run()
        @rtype: dict
        """
        result = {}

        startTime = time.time()

        for id, protocol in jobs.items():

            try:
                T.flushPrint( "%s " % str(id) )

                protocol.update( {'nice':self.nice} )

                x = None  ## free memory from previous run

                x = AmberEntropist( **protocol )

                x.run()

                r = x.result

                if r:
                    r['__version_AmberEntropist'] = x.version()

                    result[ id ] = r
                else:
                    result[ id ] = None

            except EntropistError, why:
                self.reportError( str(type(why)), id )
            except IOError, why:
                self.reportError( str(why), id )
Exemple #18
0
    def go(self, jobs):
        """
        Run job.
        
        @param jobs: { ((int,int),(int,int)) : (str, str) }, maps start and end
                      position of two chunks of coordinate frames to the
                      files where the two chunks are pickled.
        @type  jobs: {((int,int),(int,int)) : (str, str)}

        @return: the rms between the frames
        @rtype: [float]              
        """

        result = {}
        frames = None

        startTime = time.time()

        try:

            for i, frames in jobs.items():

                T.flushPrint( str(i) )

                f1 = self.__getFrames( frames[0])
                f2 = self.__getFrames( frames[1])

                result[ i ] = self.calcRmsd(i, f1, f2 )

            print "\navg time for last %i complexes: %f s" %\
                  ( len(jobs), (time.time()-startTime)/len(jobs))

        except IOError, why:
            self.reportError("Cannot open temporary frame file "+\
                             "(can happen if slave catches exit signal)",
                             frames )
def reduceComplexList( cl ):
    """
    If cl comes from a multiple model docking, and HEX macrodocking was
    switched on, the list contains more than 512 solutions per model
    combination. Keep only the first 512 solutions.
    -> shortened or unchanged ComplexList
    """
    if len( cl ) != len(cl.models.recModels()) * len(cl.models.ligModels()) * 512:
        T.flushPrint('\nRemoving HEX solutions greater than 512.\n')
        len_old = len( cl )
        cl = cl.filter('soln', (0,512) )
        T.flushPrint('%i solutions removed.\n' % (len_old - len(cl) ) )
        T.flushPrint('%i solutions remain.\n' % len( cl ) )

    return cl
Exemple #20
0
def reduceComplexList(cl):
    """
    If cl comes from a multiple model docking, and HEX macrodocking was
    switched on, the list contains more than 512 solutions per model
    combination. Keep only the first 512 solutions.
    -> shortened or unchanged ComplexList
    """
    if len(cl) != len(cl.models.recModels()) * len(
            cl.models.ligModels()) * 512:
        T.flushPrint('\nRemoving HEX solutions greater than 512.\n')
        len_old = len(cl)
        cl = cl.filter('soln', (0, 512))
        T.flushPrint('%i solutions removed.\n' % (len_old - len(cl)))
        T.flushPrint('%i solutions remain.\n' % len(cl))

    return cl
Exemple #21
0
def reduceComplexList(cl):
    """
    If cl comes from a multiple model docking, and HEX macrodocking was
    switched on, the list contains more than 512 solutions per model
    combination. Keep only the first 512 solutions.
    -> shortened or unchanged ComplexList
    """
    if len(cl) == len(cl.recModels()) * len(cl.ligModels()) * 512:
        return cl

    if len(cl) < 512:
        print '\nNOTE: THE COMPLEX LIST IS SHORTER THAN 512 COMPLEXES'
        print '   COMPLEX LIST CONTAINS %i ONLY COMPLEXES\n' % len(cl)
        return cl

    t.flushPrint('\nRemoving HEX solutions greater than 512.\n')
    r = cl.filter('soln', (0, 512))
    t.flushPrint('%i solutions removed.\n' % (len(cl) - len(r)))
    t.flushPrint('%i solutions remain.\n' % len(r))

    return r
Exemple #22
0
def reduceComplexList( cl ):
    """
    If cl comes from a multiple model docking, and HEX macrodocking was
    switched on, the list contains more than 512 solutions per model
    combination. Keep only the first 512 solutions.
    -> shortened or unchanged ComplexList
    """
    if len( cl ) == len(cl.recModels()) * len(cl.ligModels()) * 512:
        return cl

    if len( cl ) < 512:
        print '\nNOTE: THE COMPLEX LIST IS SHORTER THAN 512 COMPLEXES'
        print '   COMPLEX LIST CONTAINS %i ONLY COMPLEXES\n'%len(cl)
        return cl

    t.flushPrint('\nRemoving HEX solutions greater than 512.\n')
    r = cl.filter('soln', (0,512) )
    t.flushPrint('%i solutions removed.\n' % (len( cl ) - len( r ) ) )
    t.flushPrint('%i solutions remain.\n' % len( r ) )

    return r
Exemple #23
0

###########################
# MAIN
###########################

if len(sys.argv) < 3:
    _use()

options = t.cmdDict(defaultOptions())

## ## current keys used for scoring
## scoreKeys = ['eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max']

## load docking solutions
t.flushPrint("\nLoading complex list %s ... " % t.absfile(options['i']))
complex_lst = t.load(options['i'])
t.flushPrint("done\n")

## validate and expand list of keys to be calculated
force = []
if options.has_key('f'):
    raw_force = t.toList(options['f'])

    ## check that the key is valid
    validKeys = [
        'fnac_4.5', 'fnac_10', 'fnrc_4.5', 'fnarc_9', 'fnarc_10', 'c_ratom_9',
        'c_ratom_10', 'eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max',
        'cons_abs', 'rms_if', 'rms_if_bb'
    ]
Exemple #24
0
## Find homologues to the target sequence using blast against "seq_db"
## Cluster the seuences and write the result to nr.fasta

## input: target.fasta
## 
## output: sequences/all.fasta
##                  /blast.out
##                  /cluster_result.out
##                  /nr.fasta                 (input for Aligner)

searcher = SequenceSearcher( outFolder=outFolder, clusterLimit=clustLim,
                             verbose=1, log=log )

if 'psi' in options:
    ## local PSIBlast - not fully implemented!!
    tools.flushPrint('Performing local PSI blast search\n')
    rounds = int( options['psi'] )
    searcher.localPSIBlast( f_target, seq_db, e=e, rounds=rounds,
                            alignments=aln, **ext_options )
else:
    ## if it looks like local Blast is installed
    tools.flushPrint('Performing local blast search\n')
    if os.environ.has_key('BLASTDB') and settings.blast_bin:
        searcher.localBlast( f_target, seq_db, 'blastp', alignments=aln,
                             e=e, **ext_options )
    ## try remote Blast   
    else:
        tools.flushPrint('Performing remote blast search\n')
        searcher.remoteBlast( f_target, seq_db, 'blastp', alignments=aln, 
                              e=e, **ext_options )
    
Exemple #25
0
    pdb      - PDB code to be stored in trajectory
"""
        sys.exit(0)

##########
## MAIN ##

use()

o = T.cmdDict( {'n':10} )

f_in  = T.absfile( o['i'] )
f_out = T.absfile( o.get('o', f_in) )
n = int( o['n'] )

T.flushPrint("Loading...")
t = T.load( f_in )

T.flushPrint("Converting %i frames..." % len(t) )

if isinstance(t, EnsembleTraj ):
    T.flushPrint( "Nothing to be done!\n")
    sys.exit(0)
    
t = traj2ensemble( t, n )
if 'pdb' in o:
    t.ref.pdbCode = o['pdb']

if f_in == f_out:
    os.rename( f_in, f_in + '_backup')
Exemple #26
0
        print 'Current directory is not a valid modeling project folder.' 
        _use( options )
        
    ## Try to add project folders
    ## look for default cross-validation projects
    d = []
    if os.path.exists( f + VS.F_RESULT_FOLDER ):
        d = [ f ]

    if options.has_key('d'):
        folders = T.toList(options['d'])
    else: 
        folders = d

                       
    T.flushPrint("Starting job...\n")

    for f in folders:
            a = A(outFolder=f)
            a.go()

    T.flushPrint("Done.\n")
    
    ## show result in PyMol
    if options.has_key('s'):
        p=Pymoler()
        p.addPdb( folders[0] + a.F_FINAL_PDB )
        p.add('color_b')
        p.add('select na, b<0')
        p.add('color grey, na')
        p.add('hide all')
Exemple #27
0
        'rdic': 'pcr_rec/models.dic'
    })

    if len(sys.argv) < 2:
        syntax()

    fs = [T.absfile(f) for f in T.toList(options['i'])]

    result = ComplexList()

    rec_dic = T.load(T.absfile(options['rdic']))
    lig_dic = T.load(T.absfile(options['ldic']))

    for f in fs:

        T.flushPrint('Loading %s ...' % f)

        cl = T.load(f)

        cl = reduceComplexList(cl)

        result += cl

    T.flushPrint('done\n')

    T.flushPrint('correct model numbers...')
    correct_model_numbers(result, rec_dic, lig_dic)

    T.flushPrint('\ncasting all rec models...')
    pairwise_cast(result.models.recModels())
Exemple #28
0
Default options:
"""
    
    for key, value in o.items():
        print "\t-",key, "\t",value

    sys.exit(0)


if __name__ == '__main__':

    options = T.cmdDict({'o':[ os.getcwd() ]})

    if '?' in options or 'help' in options:
        _use( options )
                       
    folders = T.toList( options['o'] )

    if not os.path.exists( folders[0] +'/templates'):
        print 'Current directory is not a valid modeling folder.' 
        _use( options )

    T.flushPrint( "Creating folders and links...\n" )
  
    for f in folders:
        sv = VS(outFolder=f)
        sv.go(f)

    T.flushPrint( "done\n" )
Exemple #29
0

###############
## SequenceSearcher
##
## Find homologues to the target sequence using blast against "seq_db"
## Cluster the seuences and write the result to nr.fasta

## input: target.fasta
## 
## output: sequences/all.fasta
##                  /blast.out
##                  /cluster_result.out
##                  /nr.fasta                 (input for Aligner)

tools.flushPrint('Searching for homologues ...')

try:
    # initiate 
    searcher = SequenceSearcher( outFolder=outFolder, verbose=1, log=log )

#    ## local PSIBlast - not fully implemented!!
#    searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000)

    ## local Blast
    searcher.localBlast( f_target, seq_db, 'blastp', alignments=500, e=0.0001 )

    ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1
    ## expects all.fasta
#    searcher.clusterFastaIterative( )
    searcher.clusterFasta() 
Exemple #30
0
    if not os.path.exists(f + VS.F_RESULT_FOLDER) and not options.has_key('d'):
        print 'Current directory is not a valid modeling project folder.'
        _use(options)

    ## Try to add project folders
    ## look for default cross-validation projects
    d = []
    if os.path.exists(f + VS.F_RESULT_FOLDER):
        d = [f]

    if options.has_key('d'):
        folders = T.toList(options['d'])
    else:
        folders = d

    T.flushPrint("Starting job...\n")

    for f in folders:
        a = A(outFolder=f)
        a.go()

    T.flushPrint("Done.\n")

    ## show result in PyMol
    if options.has_key('s'):
        p = Pymoler()
        p.addPdb(folders[0] + a.F_FINAL_PDB)
        p.add('color_b')
        p.add('select na, b<0')
        p.add('color grey, na')
        p.add('hide all')
Exemple #31
0
        _use(options)

    if not os.path.exists(f + B.F_INPUT_FOLDER) and not options.has_key('d'):
        print 'Current directory is not a valid modeling project folder.'
        _use(options)

    ## Try to add project folders
    ## look for default cross-validation projects
    d = []

    if os.path.exists(f + VS.F_RESULT_FOLDER):
        d = glob.glob(f + VS.F_RESULT_FOLDER + '/*')

    if options.has_key('d'):
        folders = T.toList(options['d'])
    else:
        folders = d

    reference = options.get('ref', None)

    model_list = options.get('modlist', None)

    T.flushPrint("Starting job...\n")

    for f in folders:
        T.flushPrint("\tWorking on %s\n" % os.path.split(f)[1])
        b = B(outFolder=f)
        b.go(model_list=model_list, reference=reference)

    T.flushPrint("Done.\n")
Exemple #32
0
if end:
    end = int( end )
step = int( o.get('step',1) )

ref = o.get('ref',None)
if ref:
    ref = PDBModel( T.absfile( ref ) )
    if 'prot' in o:
        ref = ref.compress( ref.maskProtein() )


result_xyz = []
result_frameNames = []
result_ref = None

T.flushPrint("Loading and appending trajectories...")
for i in range( len( inLst ) ):

    t = loadTraj( inLst[i], i, start, end, step )

    if 'prot' in o:
        t.keepAtoms( N0.nonzero(t.ref.maskProtein()) )

    result_ref = result_ref or ref or t.ref

    if t.ref.equals( result_ref ) != [1,1]:
        raise Exception( 'Incompatible reference structure.' )
    
    for xyz in t.frames:
        result_xyz.append( xyz.astype('f') )
Exemple #33
0
    except cPickle.UnpicklingError:

        print "Trying to load %s in sloppy mode..." % f
        return PickleUpgrader(open(T.absfile(f))).load()
    

#########
## Main
#########

__use()

fs = sys.argv[1:]

for f in fs:

    try:
        o = sloppyload( f )

        ## don't slim PDBModels that are their own source
        if isinstance( o, PDBModel ) and str( o.source ) == T.absfile( f ):
            o.forcePickle = 1
            
        T.flushPrint('Dumping %s\n' % f )
        T.dump( o, T.absfile( f ) )

    except:
        print "Error with ", f
        print T.lastError()
Exemple #34
0
## input: target.fasta
##
## output: sequences/all.fasta
##                  /blast.out
##                  /cluster_result.out
##                  /nr.fasta                 (input for Aligner)

searcher = SequenceSearcher(outFolder=outFolder,
                            clusterLimit=clustLim,
                            verbose=1,
                            log=log)

if 'psi' in options:
    ## local PSIBlast - not fully implemented!!
    tools.flushPrint('Performing local PSI blast search\n')
    rounds = int(options['psi'])
    searcher.localPSIBlast(f_target,
                           seq_db,
                           e=e,
                           rounds=rounds,
                           alignments=aln,
                           **ext_options)
else:
    ## if it looks like local Blast is installed
    tools.flushPrint('Performing local blast search\n')
    if os.environ.has_key('BLASTDB') and settings.blast_bin:
        searcher.localBlast(f_target,
                            seq_db,
                            'blastp',
                            alignments=aln,
Exemple #35
0
    def retrievePDBs(self, outFolder=None, pdbCodes=None):
        """
        Get PDB from local database if it exists, if not try to
        download the coordinartes drom the RSCB.
        Write PDBs for given fasta records. Add PDB infos to internal
        dictionary of fasta records. NMR structures get resolution 3.5.

        @param outFolder: folder to put PDB files into (default: L{F_ALL})
        @type  outFolder: str OR None
        @param pdbCodes: list of PDB codes [all previously found templates]
        @type  pdbCodes: [str]

        @return: list of PDB file names
        @rtype: [str]

        @raise BlastError: if can't write file
        """
        outFolder = outFolder or self.outFolder + self.F_ALL
        pdbCodes = pdbCodes or self.record_dic.keys()
        result = []
        i = 0
        if not self.silent:
            T.flushPrint("fetching %i PDBs (l=local, r=remotely)..." % \
                         len( pdbCodes ) )

        for c in pdbCodes:

            i += 1

            fname = '%s/%s.pdb' % (outFolder, c)

            try:
                if os.path.exists(fname):
                    h = open(fname, 'r')
                else:
                    h = self.getLocalPDBHandle(c)
                if not self.silent:
                    T.flushPrint('l')
            except:
                h = self.getRemotePDBHandle(c)
                if not self.silent:
                    T.flushPrint('r')

            try:
                lines, infos = self.parsePdbFromHandle(h, first_model_only=1)
                infos['file'] = fname

                if c in self.record_dic:
                    self.record_dic[c].__dict__.update(infos)

                ## close if it is a handle
                try:
                    h.close()
                except:
                    pass

                if not os.path.exists(fname):
                    f = open(fname, 'w', 1)
                    f.writelines(lines)
                    f.close()

                result += [fname]

            except IOError, why:
                raise BlastError("Can't write file " + fname)
Exemple #36
0
"""
        sys.exit(0)


##########
## MAIN ##

use()

o = T.cmdDict({'n': 10})

f_in = T.absfile(o['i'])
f_out = T.absfile(o.get('o', f_in))
n = int(o['n'])

T.flushPrint("Loading...")
t = T.load(f_in)

T.flushPrint("Converting %i frames..." % len(t))

if isinstance(t, EnsembleTraj):
    T.flushPrint("Nothing to be done!\n")
    sys.exit(0)

t = traj2ensemble(t, n)
if 'pdb' in o:
    t.ref.pdbCode = o['pdb']

if f_in == f_out:
    os.rename(f_in, f_in + '_backup')
Exemple #37
0
Default options:
"""

    for key, value in o.items():
        print "\t-", key, "\t", value

    sys.exit(0)


if __name__ == '__main__':

    options = T.cmdDict({'o': [os.getcwd()]})

    if '?' in options or 'help' in options:
        _use(options)

    folders = T.toList(options['o'])

    if not os.path.exists(folders[0] + '/templates'):
        print 'Current directory is not a valid modeling folder.'
        _use(options)

    T.flushPrint("Creating folders and links...\n")

    for f in folders:
        sv = VS(outFolder=f)
        sv.go(f)

    T.flushPrint("done\n")
Exemple #38
0
    except cPickle.UnpicklingError:

        print "Trying to load %s in sloppy mode..." % f
        return PickleUpgrader(open(T.absfile(f))).load()
    

#########
## Main
#########

__use()

fs = sys.argv[1:]

for f in fs:

    try:
        o = sloppyload( f )

        ## don't slim PDBModels that are their own source
        if isinstance( o, PDBModel ) and str( o.source ) == T.absfile( f ):
            o.forcePickle = 1
            
        T.flushPrint('Dumping %s\n' % f )
        T.dump( o, T.absfile( f ) )

    except:
        print "Error with ", f
        print T.lastError()
Exemple #39
0
"""
Restart a distributed calculation.
Syntax:  restartPVM.py -i |rst_file| [-a]
Options:
         i  .. restart file containing result of TrackingJobMaster.getRst()
         a  .. add hosts to PVM
"""
        sys.exit(0)

## MAIN ##

use()

cmd = T.cmdDict()

T.flushPrint('Loading restart data...')
rst = T.load( cmd['i'] )

hosts = []
if 'a' in cmd:
    hosts = [ h['host'] for h in rst['hosts'] ]

master = restart( rst, hosts=hosts )

T.flushPrint('Master initialized for restart.')

master.start()



Exemple #40
0
    if not os.path.exists( f + B.F_INPUT_FOLDER ) and not options.has_key('d'):
        print 'Current directory is not a valid modeling project folder.' 
        _use( options )
        
    ## Try to add project folders
    ## look for default cross-validation projects
    d = []
    
    if os.path.exists( f + VS.F_RESULT_FOLDER ):
        d = glob.glob( f + VS.F_RESULT_FOLDER + '/*' )

    if options.has_key('d'):
        folders = T.toList(options['d'])
    else: 
        folders = d
    
    reference = options.get('ref', None)
        
    model_list = options.get('modlist', None)

                       
    T.flushPrint("Starting job...\n")

    for f in folders:
        T.flushPrint("\tWorking on %s\n"%os.path.split(f)[1])
        b = B( outFolder=f )
        b.go(model_list = model_list, reference = reference)

    T.flushPrint( "Done.\n")
Exemple #41
0
end = o.get('e', None)
if end:
    end = int(end)
step = int(o.get('step', 1))

ref = o.get('ref', None)
if ref:
    ref = PDBModel(T.absfile(ref))
    if 'prot' in o:
        ref = ref.compress(ref.maskProtein())

result_xyz = []
result_frameNames = []
result_ref = None

T.flushPrint("Loading and appending trajectories...")
for i in range(len(inLst)):

    t = loadTraj(inLst[i], i, start, end, step)

    if 'prot' in o:
        t.keepAtoms(N.nonzero(t.ref.maskProtein()))

    result_ref = result_ref or ref or t.ref

    if t.ref.equals(result_ref) != [1, 1]:
        raise Exception('Incompatible reference structure.')

    for xyz in t.frames:
        result_xyz.append(xyz.astype('f'))
Exemple #42
0
    if len(sys.argv) < 2:
        print \
"""
Restart a distributed calculation.
Syntax:  restartPVM.py -i |rst_file| [-a]
Options:
         i  .. restart file containing result of TrackingJobMaster.getRst()
         a  .. add hosts to PVM
"""
        sys.exit(0)


## MAIN ##

use()

cmd = T.cmdDict()

T.flushPrint('Loading restart data...')
rst = T.load(cmd['i'])

hosts = []
if 'a' in cmd:
    hosts = [h['host'] for h in rst['hosts']]

master = restart(rst, hosts=hosts)

T.flushPrint('Master initialized for restart.')

master.start()
Exemple #43
0
def randomSurfaces( base_folder, label, mask ):
    """
    calculate surfaces for all peptides and return the
    average and SD
    """
    ## container for results and standard deviations
    MS,    AS    = {}, {}
    MS_sd, AS_sd = {}, {}

    ## loop over peptide directories
    for k in MOU.aaAtoms.keys():
        dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k)
        fLst = glob.glob( dir + '/*.pdb')
        
        msLst = []
        asLst = []
        
        ## loop over pdb files for each peptide
        T.flushPrint( '\nNow collecting data in %s'%dir )
        for f in fLst:

            ## load peptide and remove waters and hydrogens
            m = PDBModel( f )
            m = m.compress( m.maskProtein() * m.maskHeavy() )
            T.flushPrint( '.')

            ## add surface data
            try:
                d = PDBDope( m )
                d.addSurfaceRacer( probe=1.4 )

                ## remove tailing GLY
                m = m.compress( m.res2atomMask(mask) )
                
                ## collect surface data for each peptide
                msLst += [ m.profile('MS') ]
                asLst += [ m.profile('AS') ]
                       
            except:
                print 'Failed calculating exposure for GLY-%s-GLY'%(k)
                print '\t and file %s'%f
                
        ## get result dictionary for peptide
        T.flushPrint('\nCollecting data ...\n')
        msDic = {}
        asDic = {}
        msDic_sd = {}
        asDic_sd = {}

        j = 0
        #atoms =  [ a['name'] for a in m.atoms ]
        for n in m['name']:
            msDic[n]    = N0.average(msLst)[j]
            asDic[n]    = N0.average(asLst)[j]
            msDic_sd[n] = MAU.SD( msLst )[j]
            asDic_sd[n] = MAU.SD( asLst )[j]
            j += 1

        MS[ k ] = msDic
        AS[ k ] = asDic
        MS_sd[ k ] = msDic_sd
        AS_sd[ k ] = asDic_sd

    return MS, AS, MS_sd, AS_sd
Exemple #44
0
tmp_db = 'pdbaa'

###############
## SequenceSearcher
##
## Find homologues to the target sequence using blast against "seq_db"
## Cluster the seuences and write the result to nr.fasta

## input: target.fasta
##
## output: sequences/all.fasta
##                  /blast.out
##                  /cluster_result.out
##                  /nr.fasta                 (input for Aligner)

tools.flushPrint('Searching for homologues ...')

try:
    # initiate
    searcher = SequenceSearcher(outFolder=outFolder, verbose=1, log=log)

    #    ## local PSIBlast - not fully implemented!!
    #    searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000)

    ## local Blast
    searcher.localBlast(f_target, seq_db, 'blastp', alignments=500, e=0.0001)

    ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1
    ## expects all.fasta
    #    searcher.clusterFastaIterative( )
    searcher.clusterFasta()
Exemple #45
0
    def __init__( self, verbose=1, **options ):
        """
        @param verbose: verbosity level (default: 1)
        @type  verbose: 1|0        
        @param options: needs::
                         rec,lig - file name, receptor, ligand trajectories
                         ref     - file name, pickled reference complex
        @type  options: any

        @raise AnalyzeError: if atoms are not aligned
        """
        self.options = options

        if verbose: t.flushPrint("\nLoading...")
        self.t_lig = t.load( options['lig'] )
        self.t_rec = t.load( options['rec'] )
        self.com= t.load( options['ref'] )

        ## delete H from all players
        self.t_lig.removeAtoms( self.t_lig.getRef().maskH() )
        self.t_rec.removeAtoms( self.t_rec.getRef().maskH() )

        self.com.rec_model.remove( self.com.rec_model.maskH() )
        self.com.lig_model.remove( self.com.lig_model.maskH() )
        self.com.lig_transformed = None

        ## equalize atom content of free (trajectory) and bound models
        if verbose: t.flushPrint('\nCasting...')

        bnd_rec = self.com.rec()
        bnd_lig = self.com.lig_model

        self.t_rec.sortAtoms()
        self.t_lig.sortAtoms()

        bnd_rec = bnd_rec.sort()
        bnd_lig = bnd_lig.sort()

        #m_bnd_rec, m_t_rec = bnd_rec.equalAtoms( self.t_rec.getRef() )
        #m_bnd_lig, m_t_lig = bnd_lig.equalAtoms( self.t_lig.getRef() )

        i_bnd_rec, i_t_rec = bnd_rec.compareAtoms( self.t_rec.getRef() )
        i_bnd_lig, i_t_lig = bnd_lig.compareAtoms( self.t_lig.getRef() )

        #self.t_rec.removeAtoms( N0.logical_not( m_t_rec ) )
        #self.t_lig.removeAtoms( N0.logical_not( m_t_lig ) )

        self.t_rec = self.t_rec.takeAtoms( i_t_rec )
        self.t_lig = self.t_lig.takeAtoms( i_t_lig )

        #self.mask_free_lig = m_t_lig
        #self.mask_free_rec = m_t_rec
        self.i_free_lig = i_t_lig
        self.i_free_rec = i_t_rec

        #bnd_rec.remove( N0.logical_not( m_bnd_rec ) )
        #bnd_lig.remove( N0.logical_not( m_bnd_lig ) )
        bnd_rec = bnd_rec.take( i_bnd_rec )
        bnd_lig = bnd_lig.take( i_bnd_lig )

        #self.mask_bnd_rec = m_bnd_rec
        #self.mask_bnd_lig = m_bnd_lig
        self.i_bnd_rec = i_bnd_rec
        self.i_bnd_lig = i_bnd_lig

        ## put 'equalized' models back into ref complex
        self.com.rec_model = bnd_rec
        self.com.lig_model = bnd_lig
        self.com.lig_transformed = None

        ## check
        if not self.t_rec.getRef().equals( self.com.rec() )[1] or \
           not self.t_lig.getRef().equals( self.com.lig() )[1]:

            raise AnalyzeError('Atoms are not aligned.')

        ## native contact matrix
        self.contacts = self.com.atomContacts()

        self.hexContacts = None
                          'ldic':'pcr_lig/models.dic',
                          'rdic':'pcr_rec/models.dic'} )

    if len( sys.argv ) < 2:
        syntax()

    fs = [ T.absfile( f ) for f in T.toList( options['i'] ) ]

    result = ComplexList()
    
    rec_dic = T.load( T.absfile( options['rdic'] ) )
    lig_dic = T.load( T.absfile( options['ldic'] ) )

    for f in fs:

        T.flushPrint('Loading %s ...' % f )

        cl = T.load( f )
        
        cl = reduceComplexList( cl )

        result += cl

    T.flushPrint('done\n')

    T.flushPrint('correct model numbers...')
    correct_model_numbers( result, rec_dic, lig_dic )

    T.flushPrint( '\ncasting all rec models...' )
    pairwise_cast( result.models.recModels() )
Exemple #47
0

###########################
# MAIN
###########################

if len(sys.argv) < 3:
    _use()

options = t.cmdDict( defaultOptions() )

## ## current keys used for scoring
## scoreKeys = ['eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max']

## load docking solutions
t.flushPrint( "\nLoading complex list %s ... " % t.absfile( options['i'] ) )
complex_lst = t.load( options['i'] )
t.flushPrint( "done\n" )

## validate and expand list of keys to be calculated
force = []
if options.has_key('f'):
    raw_force = t.toList( options['f'] )

    ## check that the key is valid
    validKeys = ['fnac_4.5', 'fnac_10', 'fnrc_4.5',
                 'fnarc_9', 'fnarc_10', 'c_ratom_9', 'c_ratom_10',
                 'eProsa', 'ePairScore', 'foldX',
                 'cons_ent', 'cons_max', 'cons_abs',
                 'rms_if', 'rms_if_bb']
Exemple #48
0
def randomSurfaces( base_folder, label, mask ):
    """
    calculate surfaces for all peptides and return the
    average and SD
    """
    ## container for results and standard deviations
    MS,    AS    = {}, {}
    MS_sd, AS_sd = {}, {}

    ## loop over peptide directories
    for k in MOU.aaAtoms.keys():
        dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k)
        fLst = glob.glob( dir + '/*.pdb')
        
        msLst = []
        asLst = []
        
        ## loop over pdb files for each peptide
        T.flushPrint( '\nNow collecting data in %s'%dir )
        for f in fLst:

            ## load peptide and remove waters and hydrogens
            m = PDBModel( f )
            m = m.compress( m.maskProtein() * m.maskHeavy() )
            T.flushPrint( '.')

            ## add surface data
            try:
                d = PDBDope( m )
                d.addSurfaceRacer( probe=1.4 )

                ## remove tailing GLY
                m = m.compress( m.res2atomMask(mask) )
                
                ## collect surface data for each peptide
                msLst += [ m.profile('MS') ]
                asLst += [ m.profile('AS') ]
                       
            except:
                print 'Failed calculating exposure for GLY-%s-GLY'%(k)
                print '\t and file %s'%f
                
        ## get result dictionary for peptide
        T.flushPrint('\nCollecting data ...\n')
        msDic = {}
        asDic = {}
        msDic_sd = {}
        asDic_sd = {}

        j = 0
        #atoms =  [ a['name'] for a in m.atoms ]
        for n in m['name']:
            msDic[n]    = N.average(msLst)[j]
            asDic[n]    = N.average(asLst)[j]
            msDic_sd[n] = MAU.SD( msLst )[j]
            asDic_sd[n] = MAU.SD( asLst )[j]
            j += 1

        MS[ k ] = msDic
        AS[ k ] = asDic
        MS_sd[ k ] = msDic_sd
        AS_sd[ k ] = asDic_sd

    return MS, AS, MS_sd, AS_sd
Exemple #49
0
    def retrievePDBs( self, outFolder=None, pdbCodes=None ):
        """
        Get PDB from local database if it exists, if not try to
        download the coordinartes drom the RSCB.
        Write PDBs for given fasta records. Add PDB infos to internal
        dictionary of fasta records. NMR structures get resolution 3.5.

        @param outFolder: folder to put PDB files into (default: L{F_ALL})
        @type  outFolder: str OR None
        @param pdbCodes: list of PDB codes [all previously found templates]
        @type  pdbCodes: [str]

        @return: list of PDB file names
        @rtype: [str]

        @raise BlastError: if can't write file
        """
        outFolder = outFolder or self.outFolder + self.F_ALL
        pdbCodes = pdbCodes or self.record_dic.keys()
        result = []
        i = 0
        if not self.silent:
            T.flushPrint("fetching %i PDBs (l=local, r=remotely)..." % \
                         len( pdbCodes ) )

        for c in pdbCodes:

            i += 1

            fname = '%s/%s.pdb' % (outFolder, c)

            try:
                if os.path.exists( fname ):
                    h = open( fname, 'r' )
                else:
                    h = self.getLocalPDBHandle( c )
                if not self.silent:
                    T.flushPrint('l')
            except:
                h = self.getRemotePDBHandle( c )
                if not self.silent:
                    T.flushPrint('r')

            try:
                lines, infos = self.parsePdbFromHandle( h, first_model_only=1 )
                infos['file'] = fname

                if c in self.record_dic:
                    self.record_dic[ c ].__dict__.update( infos )

                ## close if it is a handle
                try: h.close()
                except:
                    pass

                if not os.path.exists( fname ):
                    f = open( fname, 'w', 1 )
                    f.writelines( lines )
                    f.close()

                result += [ fname ]

            except IOError, why:
                raise BlastError( "Can't write file "+fname )