Python AwkLike Examples, cing.Libs.AwkLike.AwkLike Python Examples

Example #1

0

Show file

File: NTgenUtils.py Project: VuisterLab/cing

def analyzeXplorLog(logFile, extraIgnoreLineList=[], extraIgnoreCountList=[]):
    '''
    @param logFile:
    @param extraIgnoreLineList:
    @param extraIgnoreCountList:

    Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug]
             0          1        2         3           4           5
    Return None on error.

    The numbers of lines should add up to the total number of lines.
    For a specific other log file type look at the example in cing.Scripts.FC.utils
    '''
    result = [None, None, 0, 0, 0, 0]
    if not logFile:
        nTerror("logFile %s was not specified." % logFile)
        return None
    if not os.path.exists(logFile):
        nTerror("logFile %s was not found." % logFile)
        return None

    for r in AwkLike(logFile):
        line = r.dollar[0]
        lineLower = line.lower()
        if line.startswith(' %') and line.count('ERR'):
            toIgnore = False
            for ignoreLine in ignoreLineXplorList:
                ignoreLineLower = ignoreLine.lower()
                if lineLower.count(ignoreLineLower):
                    #                    nTdebug("Ignoring line for error count: %s" % line)
                    toIgnore = True
                    break  # finding one is enough
                # end if
            # end
            if toIgnore:
                result[4] += 1
                continue
            nTwarning("Found eeeeeeror in line: %s" % line)
            result[2] += 1
        elif line.startswith(' %') and line.count('WRN'):
            nTdebug("Found warning in line: %s" % line)
            result[3] += 1
        elif line.startswith(prefixDebug):
            nTdebug("Found debug in line: %s" % line)
            result[5] += 1
        else:
            result[4] += 1
            if line.count('total CPU time='):
                #                nTdebug("Matched time in line: %s" % line)
                timeTakenStr = r.dollar[r.NF - 1]
                result[0] = float(timeTakenStr)
    #            nTdebug("Found time: %s" % self.timeTakenDict[entry_code])
            elif line.count('Program execution will be terminated'):
                nTdebug("Matched termination on line: %s" % line)
                result[1] = True
            # end elif
        # end else
    return result

Example #2

0

Show file

File: procheck.py Project: VuisterLab/cing

 def getPostscriptFileNames(self):
     """
     Return a NTlist with (postscriptFileName, description) tuples
     """
     result = NTlist()
     path = os.path.join(self.rootPath, 'postscriptFiles.lis')
     if os.path.exists(path):
         for line in AwkLike(path, separator=':'):
             result.append(
                 (line.dollar[2][:-1].strip(), line.dollar[1][:-6].strip()))
     else:
         nTwarning("Failed to find %s in getPostscriptFileNames" % path)
     return result

Example #3

0

Show file

def importCyanaStereoFile( project, stereoFileName, convention ):
    """Import stereo assignments from CYANA
       return project or None on error.

CYANA stereo file:

var info echo
echo:=off
info:=none
atom stereo "HB2  HB3   509"   # GLU-
atom stereo "QG1  QG2   511"   # VAL
atom stereo "HB2  HB3   513"   # HIS
atom stereo "QG1  QG2   514"   # VAL
atom stereo "HG2  HG3   516"   # GLU-
atom stereo "HA1  HA2   519"   # GLY

    """
    if project.molecule == None:
        return None

    molecule = project.molecule
    atomDict = molecule.getAtomDict(convention)
    count = 0
    for line in AwkLike( stereoFileName, minNF=5 ):
        if line.dollar[1] == 'atom' and line.dollar[2] == 'stereo':
            resnum = int (line.dollar[5].strip('"') )
            for i in [3,4]:
                atm = None
                t = (resnum, line.dollar[i].strip('"'))
                if atomDict.has_key(t):
                    atm = atomDict[t]
#                atm = molecule.decodeNameTuple( (convention, 'A', resnum, line.dollar[i].strip('"')) )
                if atm == None:
                    nTerror('importCyanaStereoFile: atom %s; line %d (%s)\n', line.dollar[i], line.NR, line.dollar[0] )
                else:
                    atm.stereoAssigned = True
                    count += 1
                    #print atm.nameTuple()
                    # Val, Ile methyls: Carbon implicit in CYANA defs
                    if atm.residue.db.name in ['VAL', 'LEU'] and atm.isMethylProton():        
                        heavy = atm.heavyAtom()
                        heavy.stereoAssigned = True
                        count += 1
                        #print heavy.nameTuple()
                    #end if
                #end if
            #end for
        #end if
    #end for
    nTmessage('==> Derived %d stereo assignments from "%s"', count, stereoFileName )
    return project

Example #4

0

Show file

    def readFile(self, tabFile):
        """
        Read table from tabFile
        """
        #        nTmessage('==> Reading nmrPipe table file ... ' )

        #end if

        for line in AwkLike(tabFile, minNF=1, commentString='#'):
            if (line.dollar[1] == 'REMARK' and line.NF > 1):
                self.remarks.append(line.dollar[2:])

            elif (line.dollar[1] == 'VARS'):
                for v in line.dollar[2:]:
                    self.addColumn(name=v)
                #end for
            elif (line.dollar[1] == 'FORMAT'):
                i = 0
                for f in line.dollar[2:]:
                    self.columnDefs[i].fmt = f
                    i += 1
                #end for
            elif (line.dollar[1] == 'DATA' and line.NF > 3):
                self.data[line.dollar[2]] = line.dollar[3:]

            elif (line.NF == len(self.columnDefs)):
                row = self.addRow()
                for i in range(0, line.NF):
                    col = self.columnDefs[i]

                    if (line.dollar[i + 1] == self.noneIndicator):
                        row[col.name] = None
                    else:
                        # derive conversion function from fmt field
                        if (col.fmt[-1:] in ['f', 'e', 'E', 'g', 'G']):
                            func = float
                        elif (col.fmt[-1:] in ['d', 'o', 'x', 'X']):
                            func = int
                        else:
                            func = str
                        #end if
                        row[col.name] = func(line.dollar[i + 1])
                    #endif
                #end for
            else:
                pass
            #end if
        #end for
        self.tabFile = tabFile

Example #5

0

Show file

File: shiftx.py Project: VuisterLab/cing

def parseShiftxOutput( fileName, molecule, chainId ):
    """
    Parse shiftx generated output (gv_version!).
    Store result in shiftx attribute (which is a NTlist type) of each atom

format file:

# Entries marked with a * may have inaccurate shift predictions.
# Entries marked with a value < -600 should be ignored
  501   H  N      116.3173
  501   H  CA      55.4902
  501   H  CB      29.9950
  501   H  C      169.8446
  501   H  H        8.4401
  or in 1y4o:
  1     G  N      109.7404
  1     G  CA      45.2787
  or in 1afp
  10    K  HZ3      3.7795 # A HZ3 that might not be present.


    Return True on error; eg. when the file is absent.
    """
    if not os.path.exists(fileName):
        nTerror("Failed to find %s" % fileName)
        return True

    atomDict = molecule.getAtomDict(IUPAC, chainId)

    for line in AwkLike( fileName, commentString = '#', minNF = 4 ):
        if (line.float(4) != -666.000):
            lineCol1 = int(line.dollar[1].strip('*'))
            if chainId != None:
                atm = molecule.decodeNameTuple( (IUPAC, chainId, lineCol1, line.dollar[3]) )
            else:
                atm =None
                if atomDict.has_key( (lineCol1,line.dollar[3]) ):
                    atm = atomDict[ (lineCol1,line.dollar[3]) ]
            #end if
#            if not atm:
#                atm = molecule.decodeNameTuple( (IUPAC, None, lineCol1, line.dollar[3]), fromCYANA2CING=True )

            if not atm:
                pass
#                nTerror('parseShiftxOutput: chainId [%s] line %d (%s)', chainId, line.NR, line.dollar[0] )
                # happens for all LYS without HZ3.
            else:
                atm.shiftx.append( line.float(4) )

Example #6

0

Show file

File: NTgenUtils.py Project: VuisterLab/cing

def analyzeWattosLog(logFile):
    """
    Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug]
    Return None on error.

    The numbers of lines should add up to the total number of lines.
    For a specific other log file type look at the example in cing.Scripts.FC.utils

    The
    """
    result = [None, None, 0, 0, 0, 0]
    if not logFile:
        nTerror("logFile %s was not specified." % logFile)
        return None
    if not os.path.exists(logFile):
        nTerror("logFile %s was not found." % logFile)
        return None

    for r in AwkLike(logFile):
        line = r.dollar[0]
        if line.startswith(prefixError):
            result[2] += 1
        elif line.startswith(prefixWarning):
            result[3] += 1
        elif line.startswith(prefixDebug):
            result[5] += 1
        else:
            result[4] += 1
            # Wattos took (#ms): 2332
            if line.startswith('Wattos took'):  # TODO: check.
                nTdebug("Matched line: %s" % line)
                timeTakenStr = r.dollar[r.NF - 1]
                result[0] = float(timeTakenStr)
                if result[0]:
                    result[0] /= 1000.  # get seconds
                nTdebug("Found time: %s" % timeTakenStr)
            elif line.startswith('Exception in thread'):  # TODO: check.
                nTdebug("Matched line: %s" % line)
                result[1] = True
        # end else
    return result

Example #7

0

Show file

    def parseResult(self):
        """
        Get summary

        Parse procheck .rin files and store result in procheck NTdict
        of each residue of mol

        """
        path = os.path.join(self.rootPath, sprintf('%s.sum',
                                                   self.molecule.name))
        fp = open(path, 'r')
        if not fp:
            nTerror('gvProcheck.parseResult: %s not found', path)
        else:
            self.summary = ''.join(fp.readlines())
            fp.close()
        #end if

        for i in range(1, self.molecule.modelCount + 1):
            path = os.path.join(self.rootPath,
                                sprintf('%s_%03d.rin', self.molecule.name, i))
            #print '> parsing >', path

            for line in AwkLike(path, minLength=64, commentString="#"):
                result = self._parseProcheckLine(line.dollar[0])
                chain = result['chain']
                resNum = result['resNum']
                residue = self.molecule.decodeNameTuple(
                    (cing.PDB, chain, resNum, None))
                if not residue:
                    nTerror('Procheck.parseResult: residue not found (%s,%d)',
                            chain, resNum)
                else:

                    residue.setdefault('procheck', NTstruct())
                    for field, value in result.iteritems():
                        residue.procheck.setdefault(field, NTlist())
                        residue.procheck[field].append(value)
                    #end for
                #end if
                del (result)

Example #8

0

Show file

File: BMRB.py Project: VuisterLab/cing

def initBMRB(project, bmrbFile, moleculeName=None):
    """
        Initialize from edited BMRB file
        Return molecule instance
    """
    mol = Molecule(name=moleculeName)
    project.appendMolecule(mol)

    error = False
    record = None
    for record in AwkLike(bmrbFile, minNF=8, commentString='#'):

        resName = record.dollar[3]
        resNum = record.int(2)

        atomName = record.dollar[4]
        #        shift   = record.float(6)
        #        serror  = record.float(7)
        #        ambig   = record.int(8)
        res = mol.addResidue(Chain.defaultChainId, resName, resNum, IUPAC)

        if (not res):
            nTerror('Error initBMRB: invalid residue %s %s line %d (%s)\n',
                    resName, atomName, record.NR, record.dollar[0])
            error = True
        #end if
    #end for

    error = error or (project.importBMRB(bmrbFile) == None)
    if error:
        nTmessage('==> initBMRB: completed with error(s)')
    else:
        nTmessage('==> initBMRB: successfully parsed %d lines from %s',
                  record.NR, record.FILENAME)
    #end if
    nTmessage("%s", mol.format())

    if error:
        return None
    return mol

Example #9

0

Show file

File: NTgenUtils.py Project: VuisterLab/cing

def analyzeCingLog(logFile):
    """
    Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug]
    Return None on error.

    The numbers of lines should add up to the total number of lines.
    For a specific other log file type look at the example in cing.Scripts.FC.utils

    The
    """
    result = [None, None, 0, 0, 0, 0]
    if not logFile:
        nTerror("logFile %s was not specified." % logFile)
        return None
    if not os.path.exists(logFile):
        nTerror("logFile %s was not found." % logFile)
        return None

    for r in AwkLike(logFile):
        line = r.dollar[0]
        if line.startswith(prefixError):
            result[2] += 1
        elif line.startswith(prefixWarning):
            result[3] += 1
        elif line.startswith(prefixDebug):
            result[5] += 1
        else:
            result[4] += 1
            if line.startswith('CING took       :'):
                #            nTdebug("Matched line: %s" % line)
                timeTakenStr = r.dollar[r.NF - 1]
                result[0] = float(timeTakenStr)
    #            nTdebug("Found time: %s" % self.timeTakenDict[entry_code])
            elif line.startswith('Traceback (most recent call last)'):
                #            nTdebug("Matched line: %s" % line)
                result[1] = True
        # end else
    return result

Example #10

0

Show file

File: NTgenUtils.py Project: VuisterLab/cing

def analyzeFcLog(logFile):
    """
    Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug]
    Return None on error.

    The numbers of lines should add up to the total number of lines.
    For a specific other log file type look at the example in cing.Scripts.FC.utils
    """
    result = [None, None, 0, 0, 0, 0]
    if not logFile:
        nTerror("logFile %s was not specified." % logFile)
        return None
    if not os.path.exists(logFile):
        nTerror("logFile %s was not found." % logFile)
        return None

    for r in AwkLike(logFile):
        line = r.dollar[0]
        line = line.lower()
        if line.count('error'):
            result[2] += 1
        elif line.count('warning'):
            result[3] += 1
        elif line.count('debug'):
            result[5] += 1
        else:
            result[4] += 1
            #            if line.startswith('CING took       :'):
            #    #            nTdebug("Matched line: %s" % line)
            #                timeTakenStr = r.dollar[r.NF - 1]
            #                result[0] = float(timeTakenStr)
            #    #            nTdebug("Found time: %s" % self.timeTakenDict[entry_code])
            if line.startswith('traceback (most recent call last)'
                               ):  # watch out this needs to be lowercase here.
                #            nTdebug("Matched line: %s" % line)
                result[1] = True
        # end else
    return result

Example #11

0

Show file

File: refine.py Project: jakesyl/cing

def parseOutput(config, project, parameters ):
    """
    Parse the output in the Jobs directory
    parameters is a NTxplor instance

    Return None on error or results on success.
    """
    nTmessage("\n-- parseOutput --")

    xplor = Xplor(config, parameters, project=project, outPath=config.directories.refined)

    logFileNameFmt = 'refine_%d.log'
    resultFileName = 'parsedOutput.txt'
    bestModelsFileNameFmt = 'best%dModels.txt'
    bestModelsParameterName = 'bestModels'
    bestModels = parameters.best                    # Integer
    allPreviousModels = parameters.models           # String
    allPreviousModelCount = parameters.bestAnneal   # Integer

    if getDeepByKeysOrAttributes( parameters, USE_ANNEALED_STR):
        logFileNameFmt = 'anneal_%d.log'
        resultFileName = 'parsedAnnealOutput.txt'
        bestModelsFileNameFmt = 'best%dModelsAnneal.txt'
        bestModelsParameterName = 'models'
        bestModels = parameters.bestAnneal
        allPreviousModels = parameters.modelsAnneal
        allPreviousModelCount = parameters.modelCountAnneal

    nTdebug( 'logFileNameFmt:         %s' % logFileNameFmt)
    nTdebug( 'resultFileName:         %s' % resultFileName)
    nTdebug( 'bestModelsFileNameFmt:  %s' % bestModelsFileNameFmt)
    nTdebug( 'bestModelsParameterName:%s' % bestModelsParameterName)
    nTdebug( 'bestModels:             %s (int)'     % bestModels)
    nTdebug( 'allPreviousModels:      %s (string)"' % allPreviousModels)
    nTdebug( 'allPreviousModelCount:  %s (int)'     % allPreviousModelCount)

    results = NTlist()
    keys = ['model', 'eTotal',
            'Enoe', 'NOErmsd', 'NOEnumber',
            'NOEbound1', 'NOEviol1',
            'NOEbound2', 'NOEviol2',
            'DIHEDrmsd', 'DIHEDnumber', 'DIHEDbound', 'DIHEDviol'
           ]

    # parse all output files
    for i in asci2list(allPreviousModels):

        file = xplor.checkPath(xplor.directories.jobs, logFileNameFmt % i)
        nTmessage('==> Parsing %s', file)

        data = NTdict()
        for key in keys:
            data[key] = None
        data.model = i

        foundEnergy = 0
        foundNOE1 = 0
        foundNOE2 = 0
        foundDIHED = 0
        awkf = AwkLike(file)
        for line in awkf:
#            nTdebug("line: %s" % line.dollar[0])
            if (not foundEnergy) and find(line.dollar[0], '--------------- cycle=     1 ----------------') >= 0:
                awkf.next()
#                nTdebug("Getting total energy from line: %s" % line.dollar[0])
                data['eTotal'] = float(line.dollar[0][11:22])

                awkf.next()
#                nTdebug("Getting NOE energy from line: %s" % line.dollar[0])
                if line.dollar[0].count("E(NOE"): # Dirty hack; use regexp next time.
#                    nTdebug("Bingo")
                    data['Enoe'] = float(line.dollar[0][68:75])
                else:
                    awkf.next()
#                    nTdebug("Getting NOE energy (try 2) from line: %s" % line.dollar[0])
                    data['Enoe'] = float(line.dollar[0][68:75])
                # end if
                foundEnergy = 1
            elif (not foundNOE1) and find(line.dollar[0], 'NOEPRI: RMS diff. =') >= 0:
                data['NOErmsd'] = float(line.dollar[5][:-1])
                data['NOEbound1'] = float(line.dollar[7][:-2])
                data['NOEviol1'] = int(line.dollar[8])
                foundNOE1 = 1
            elif (not foundNOE2) and find(line.dollar[0], 'NOEPRI: RMS diff. =') >= 0:
                data['NOEbound2'] = float(line.dollar[7][:-2])
                data['NOEviol2'] = int(line.dollar[8])
                data['NOEnumber'] = float(line.dollar[10])
                foundNOE2 = 1
            elif (not foundDIHED) and find(line.dollar[0], 'Number of dihedral angle restraints=') >= 0:
                data['DIHEDnumber'] = int(line.dollar[6])
                awkf.next()
                data['DIHEDbound'] = float(line.dollar[6][:-1])
                data['DIHEDviol'] = int(line.dollar[7])
                awkf.next()
                data['DIHEDrmsd'] = float(line.dollar[3])
                foundDIHED = 1
            #endif
        #end for
        eTotal = getDeepByKeysOrAttributes( data, 'eTotal' )
        if eTotal == None:
            nTwarning("Failed to read energy for model: %s (probably crashed/stopped)." % i)
            continue
        results.append(data)
    #end for i

    # Since above compile might have ommissions check here how many may continue.
    resultCount = len(results)
    if allPreviousModelCount > resultCount:
        nTwarning("Will only consider %s results." %resultCount)
    elif allPreviousModelCount != resultCount:
        nTwarning("Got more results (%s) than expected input (%s). Will use all results." % (bestModels,resultCount))
    # end if

    # sort the results
    if parameters.sortField in keys:
#        nTdebug("Now sorting on field: %s" % parameters.sortField)
#        if 0: # The below failed at some point but is also not much in use. Removing.
#            myComp = CompareDict(parameters.sortField)
#            results.sort(myComp)
#        else:
        NTsort( results, parameters.sortField, inplace=True )
    else:
        parameters.sortField = None
    #endif

    # print results to file and screen
    resultFile = open(xplor.joinPath(resultFileName), 'w')
    msg = '\n=== Results: sorted on "%s" ===' % parameters.sortField
    nTmessage( msg )
    fprintf(resultFile, msg + '\n')
    fmt = '%-11s '
    for k in keys:
        nTmessageNoEOL(fmt % str(k))
        fprintf(resultFile, fmt, str(k))
    #end for
    nTmessage('')
    fprintf(resultFile, '\n')
    for data in results:
        for k in keys:
            value = val2Str(getDeepByKeysOrAttributes(data, k), fmt, count=11)
            nTmessageNoEOL(value)
            fprintf(resultFile, fmt, value)
        #end for
        nTmessage('')
        fprintf(resultFile, '\n')
    #end for

    # best results to put in parameter file.
    resultCountBest = min( resultCount, bestModels )
    if resultCountBest > 0:
        msgLine = '\n=== Averages best %d models ===' % resultCountBest
        nTmessage(msgLine)
        fprintf(resultFile, msgLine + '\n' )
        for key in keys:
            getKey = Key(key)
            values = map(getKey, results[:resultCountBest])
            av, sd, dummy_n = nTaverage(values)
            msgLine = '%-12s: %10.3f +/- %-10.3f' % ( key, av, sd)
            nTmessage(msgLine)
            fprintf(resultFile, msgLine + '\n')
        #end for
        nTmessage('\n')
        fprintf(resultFile, '\n\n')

        fname = xplor.joinPath(bestModelsFileNameFmt % resultCountBest)
        f = open(fname, 'w')
        parameters[bestModelsParameterName] = ''
        for i in range(resultCountBest):
            fprintf(f, '%s/%s\n', xplor.outPath, xplor.baseName % results[i].model)
            parameters[bestModelsParameterName] = '%s%s,' % (parameters[bestModelsParameterName], results[i].model)
        #end for
        f.close()
        parameters[bestModelsParameterName] = parameters[bestModelsParameterName][:-1] # Remove trailing comma.
        nTmessage('==> Best %d models (%s) listed in %s\n', resultCountBest, parameters[bestModelsParameterName], fname)
    else:
        parameters[bestModelsParameterName] = allPreviousModels
    #end if
    resultFile.close()
    parameters.toFile(xplor.joinPath(PARAMETERS_FILE_NAME))
    return results

Example #12

0

Show file

File: procheck.py Project: VuisterLab/cing

    def parseResult(self):
        """
        Parse procheck .rin and .edt files and store result in procheck NTdict
        of each residue of molecule.

        Return True on error.

        """
        #        nTdebug("Starting pc parseResult")
        #        modelCount = self.molecule.modelCount
        modelCount = self.getMaxModelCount()

        #        nTdebug("==> Parsing procheck results")

        #        if modelCount > MAX_PROCHECK_NMR_MODELS:
        #            nTwarning("Limiting number of models analyzed from %d to %d" % (modelCount, MAX_PROCHECK_NMR_MODELS))
        #            modelCount = MAX_PROCHECK_NMR_MODELS

        # reset the procheck dictionary of each residue
        for res in self.molecule.allResidues():
            if res.has_key(PROCHECK_STR):
                del (res[PROCHECK_STR])
            res.procheck = ProcheckResidueResult(res)
        #end for

        for i in range(1, modelCount + 1):
            modelCountStr = "%03d" % i

            # special case in procheck_nmr
            if modelCount == 1:
                # special case for different versions Alan vs Jurgen...
                # JFD adds; this fails with my pc. Adding enabling code to handle both.
                modelCountStr = "000"
                path = os.path.join(
                    self.rootPath,
                    '%s_%s.rin' % (self.molecule.name, modelCountStr))
                if not os.path.exists(path):
                    #                    nTdebug('Procheck.parseResult: file "%s" not found assuming it was pc -server- version. ', path)
                    modelCountStr = "***"

            path = os.path.join(
                self.rootPath,
                '%s_%s.rin' % (self.molecule.name, modelCountStr))
            if not os.path.exists(path):
                nTerror('Procheck.parseResult: file "%s" not found', path)
                return True

            for line in AwkLike(path, minLength=64, commentString="#"):
                #                nTdebug("working on line: %s" % line.dollar[0])
                result = self._parseProcheckLine(line.dollar[0],
                                                 self.procheckDefs)
                if not result:
                    nTerror(
                        "Failed to parse procheck rin file the below line; giving up."
                    )
                    nTerror(line.dollar[0])
                    return True
                chain = result['chain']
                resNum = result['resNum']
                residue = self.molecule.decodeNameTuple(
                    (None, chain, resNum, None))
                if not residue:
                    nTerror(
                        'in Procheck.parseResult: residue not found (%s,%d); giving up.'
                        % (chain, resNum))
                    return True

#                nTdebug("working on residue %s" % residue)
                for field, value in result.iteritems():
                    if not self.procheckDefs[field][
                            3]:  # Checking store parameter.
                        continue
                    # Insert for key: "field" if missing an empty  NTlist.
                    residue.procheck.setdefault(field, NTlist())
                    #                    nTdebug( "For residue %s field %s found value %s" % ( residue, field, value ) )
                    residue.procheck[field].append(value)
#                    nTdebug("field %s has values: %s" % ( field, residue.procheck[field]))
#end for result
#end for line
#end for

        path = os.path.join(self.rootPath, '%s.edt' % self.molecule.name)
        if not os.path.exists(path):
            nTerror('Procheck.parseResult: file "%s" not found', path)
            return True
#        nTdebug( '> parsing edt >'+ path)

        for line in AwkLike(path, minLength=64, commentString="#"):
            result = self._parseProcheckLine(line.dollar[0],
                                             self.procheckEnsembleDefs)
            if not result:
                nTerror(
                    "Failed to parse procheck edt file the below line; giving up."
                )
                nTerror(line.dollar[0])
                return
            chain = result['chain']
            resNum = result['resNum']
            residue = self.molecule.decodeNameTuple(
                (None, chain, resNum, None))
            if not residue:
                nTerror(
                    'Procheck.parseResult: residue not found (%s,%d); giving up.'
                    % (chain, resNum))
                return
            #end if

            for field, value in result.iteritems():
                if not self.procheckEnsembleDefs[field][
                        3]:  # Checking store parameter.
                    continue
                # end if
                # Truncate for those rare instances ( < 10 for > 9,000 entries )
#pc gf phipsi can be extremely high:
# 1b64 SER      82        34.36      this might be an installation bug as it's value in PDBe is normal.
                if value and (field in gf_LIST_STR):
                    if value > PCgFactorMaxErrorValue:
                        nTwarning(
                            "A pc g-factor for %s of %s will be truncated to %s"
                            % (field, value, PCgFactorMaxErrorValue))
                        value = PCgFactorMaxErrorValue
                    # end if
                # end if
                residue.procheck[field] = value
            #end for
        #end for
        for field in gf_LIST_STR:
            resultList = NTlist()
            for residue in self.molecule.allResidues():
                value = residue.getDeepByKeys(PROCHECK_STR, field)
                #                nTdebug( "For residue %s field %s found value %s" % ( residue, field, value ) )
                resultList.append(value)
            averageTuple = resultList.average()
            #            nTdebug( "resultList %s" % resultList )
            # Nones will be ignored. Empty list will return )None,,)
            if averageTuple:  # get average only
                self.molecule[PROCHECK_STR][field] = averageTuple[0]
            else:
                nTwarning(
                    "No average over molecule obtained in procheck for %s" %
                    field)
        # summary
        path = os.path.join(self.rootPath, '%s.sum' % self.molecule.name)
        if not os.path.exists(path):
            nTerror('Procheck.parseResult: file "%s" not found', path)
            return True
#        nTdebug( '> parsing sum >'+ path)
        text = open(path, 'r').read()
        #        nTdebug( 'got: \n'+ text)
        if text:
            self.summary = ProcheckSummaryResult(text, self.molecule,
                                                 self.ranges)
        else:
            nTerror(
                'Procheck.parseResult: Failed to read and parse Procheck_nmr summary file (%s)',
                path)
            return True
        #end if

        self.postProcess()

        self.fileList = self.getPostscriptFileNames()

        return False

Example #13

0

Show file

    def getCingAnnoEntryInfo(self):
        """Returns True for error
        Checks the completeness and errors from annotation.
        """

        max_link_errors = 20 # VpR247Cheshire had 16 terminii etc. problems that can be ignored.
        max_chain_mapping_errors = 1
        max_any_errors = 2 * max_link_errors + max_chain_mapping_errors

        nTmessage("Get the entries tried, todo, crashed, and stopped from file system.")

        self.entry_anno_list_obsolete = NTlist()
        self.entry_anno_list_tried = NTlist()
        self.entry_anno_list_crashed = NTlist()
        self.entry_anno_list_stopped = NTlist() # mutely exclusive from entry_list_crashed
        self.entry_anno_list_done = NTlist()
        self.entry_anno_list_todo = NTlist()

        cwdCache = os.getcwd()
        os.chdir(baseDir)
        subDirList = os.listdir(DATA_STR)
        subDirList.sort()
        for subDir in subDirList:
            if len(subDir) != 2:
                if subDir != DS_STORE_STR:
                    nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']')
                continue
            entryList = os.listdir(os.path.join(DATA_STR, subDir))
            for entryDir in entryList:
                entry_code = entryDir
                if entry_code.startswith( "."):
#                    nTdebug('Skipping hidden file: [' + entry_code + ']')
                    continue
                if entry_code.endswith( "Org") or entry_code.endswith( "Test"):
#                    nTdebug('Skipping original entry: [' + entry_code + ']')
                    continue
                entrySubDir = os.path.join(DATA_STR, subDir, entry_code)
#                if not entry_code in self.entry_list_nrg_docr:
#                    nTwarning("Found entry %s in NRG-CING but not in NRG. Will be obsoleted in NRG-CING too" % entry_code)
#                    if len(self.entry_list_obsolete) < self.ENTRY_DELETED_COUNT_MAX:
#                        rmdir(entrySubDir)
#                        self.entry_list_obsolete.append(entry_code)
#                    else:
#                        nTerror("Entry %s in NRG-CING not obsoleted since there were already removed: %s" % (
#                            entry_code, self.ENTRY_DELETED_COUNT_MAX))
                # end if

#                cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing")
#                if not os.path.exists(cingDirEntry):
#                    nTmessage("Failed to find directory: %s" % cingDirEntry)
#                    continue

                # Look for last log file
                logList = glob(entrySubDir + '/log_doAnno*/*.log')
                if not logList:
                    nTmessage("Failed to find any log file in directory: %s" % entrySubDir)
                    continue
                # .cing directory and .log file present so it was tried to start but might not have finished
                self.entry_anno_list_tried.append(entry_code)

                logLastFile = logList[-1]
#                nTdebug("Found logLastFile %s" % logLastFile)
#                set timeTaken = (` grep 'CING took       :' $logFile | gawk '{print $(NF-1)}' `)
#                text = readTextFromFile(logLastFile)
                entryCrashed = False

                linkErrorList = []
                chainMappingErrorList = []
                anyErrorList = []
                for r in AwkLike(logLastFile):
                    line = r.dollar[0]
                    if line.startswith('CING took       :'):
#                        nTdebug("Matched line: %s" % line)
                        timeTakenStr = r.dollar[r.NF - 1]
                        self.timeTakenDict[entry_code] = float(timeTakenStr)
#                        nTdebug("Found time: %s" % self.timeTakenDict[entry_code])
                    if line.startswith('Traceback (most recent call last)'):
#                        nTdebug("Matched line: %s" % line)
                        if entry_code in self.entry_anno_list_crashed:
                            nTwarning("%s was already found before; not adding again." % entry_code)
                        else:
                            self.entry_anno_list_crashed.append(entry_code)
                            entryCrashed = True
                    if line.count('ERROR:'):
                        nTerror("Matched line: %s" % line)

                    hasPseudoErrorListed = line.count(" .Q") 
                    # ignore the errors for pseudos e.g. in CGR26ALyon Hopefully this is unique enough; tested well.
                    if line.count("Error: Not linking atom"):
                        if not hasPseudoErrorListed:
                            linkErrorList.append(line)
                    if line.count("Error: no chain mapping"):
                        chainMappingErrorList.append(line)
                    lineLower = line.lower()

                    hasApiErrorListed =  line.count('ApiError: ccp.nmr.NmrConstraint.DistanceConstraintItem.__init__:')
                    if lineLower.count("error"):
                        if not (hasPseudoErrorListed or hasApiErrorListed):
                            anyErrorList.append(line)

                    if line.count('Aborting'):
                        nTdebug("Matched line: %s" % line)
                        entryCrashed = True
                        if entry_code in self.entry_anno_list_crashed:
                            nTwarning("%s was already found before; not adding again." % entry_code)
                        else:
                            self.entry_anno_list_crashed.append(entry_code)
                if entryCrashed:
                    continue # don't mark it as stopped anymore.

                linkErrorListCount = len(linkErrorList)
                if linkErrorListCount > max_link_errors:
                    nTerror("%-25s has more than %s link errors;          %s" % (entry_code,max_link_errors,linkErrorListCount))
                    entryCrashed = True
                chainMappingListCount = len(chainMappingErrorList)
                if chainMappingListCount > max_chain_mapping_errors:
                    nTerror("%-25s has more than %s chain mapping errors; %s" % (entry_code,max_chain_mapping_errors,chainMappingListCount))
                    entryCrashed = True
                anyErrorListCount = len(anyErrorList)
                if anyErrorListCount > max_any_errors:
                    nTerror("%-25s has more than %s any errors;           %s" % (entry_code,max_any_errors,anyErrorListCount))
                    entryCrashed = True

                if entryCrashed:
                    continue # don't mark it as stopped anymore.

                if not self.timeTakenDict.has_key(entry_code):
                    # was stopped by time out or by user or by system (any other type of stop but stack trace)
                    nTmessage("%s Since CING end message was not found assumed to have stopped" % entry_code)
                    self.entry_anno_list_stopped.append(entry_code)
                    continue

                # Look for end statement from CING which shows it wasn't killed before it finished.
                ccpnFileEntry = os.path.join(entrySubDir, "%s.tgz"%entry_code)
                if not os.path.exists(ccpnFileEntry):
                    nTmessage("%s Since ccpn file %s was not found assumed to have stopped" % (entry_code, ccpnFileEntry))
                    self.entry_anno_list_stopped.append(entry_code)
                    continue

                self.entry_anno_list_done.append(entry_code)
            # end for entryDir
        # end for subDir
        timeTakenList = NTlist() # local variable.
        timeTakenList.addList(self.timeTakenDict.values())
        nTmessage("Time taken by CING by statistics\n%s" % timeTakenList.statsFloat())

        if not self.entry_anno_list_tried:
            nTerror("Failed to find entries that CING tried.")

        self.entry_anno_list_todo.addList(self.entry_anno_list_all)
        self.entry_anno_list_todo = self.entry_anno_list_todo.difference(self.entry_anno_list_done)

        nTmessage("Found %s entries overall for annotation." % len(self.entry_anno_list_all))
        nTmessage("Found %s entries that CING tried (T)." % len(self.entry_anno_list_tried))
        nTmessage("Found %s entries that CING crashed/failed (C)." % len(self.entry_anno_list_crashed))
        nTmessage("Found %s entries that CING stopped (S)." % len(self.entry_anno_list_stopped))
        if not self.entry_anno_list_done:
            nTerror("Failed to find entries that CING did.")
        nTmessage("Found %s entries that CING did (B=A-C-S)." % len(self.entry_anno_list_done))
        nTmessage("Found %s entries todo (A-B)." % len(self.entry_anno_list_todo))
        nTmessage("Found %s entries obsolete (not removed yet)." % len(self.entry_anno_list_obsolete))
        nTmessage("Found entries todo:\n%s" % self.entry_anno_list_todo)
        os.chdir(cwdCache)

Example #14

0

Show file

    def getCingEntriesTriedAndDone(self):
        "Returns list or None for error"
        nTdebug("From disk get the entries done in CASP-NMR-CING")

        entry_list_tried = []
        entry_list_done = []
        entry_list_crashed = []

        nTdebug("Now in: " + os.getcwd())
        subDirList = os.listdir(DATA_STR)
        for subDir in subDirList:
            if len(subDir) != 2:
                if subDir != DS_STORE_STR:
                    nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']')
                continue
            entryList = os.listdir(os.path.join(DATA_STR,subDir))
            for entryDir in entryList:
                entry_code = entryDir
                if entry_code == DS_STORE_STR:
                    continue

                entrySubDir = os.path.join(DATA_STR, subDir, entry_code)

                cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing")
                if not os.path.exists(cingDirEntry):
                    continue
                logFileValidate = 'log_validateEntryForCasp'
                for logFile in ( logFileValidate, 'log_storeCING2db' ):
                    # Look for last log file
                    logList = glob(entrySubDir + '/%s/*.log' % logFile)
                    if not logList:
                        nTmessage("Failed to find any log file in subdirectory of: %s" % entrySubDir)
                        continue
                    # .cing directory and .log file present so it was tried to start but might not have finished
    #                self.entry_anno_list_tried.append(entry_code)
                    if logFile == logFileValidate:
                        entry_list_tried.append(entry_code)
                    logLastFile = logList[-1]

                    entryCrashed = False
                    entryWithErrorMessage = False
                    for r in AwkLike(logLastFile):
                        line = r.dollar[0]
                        if line.startswith('CING took       :'):
    #                        nTdebug("Matched line: %s" % line)
                            timeTakenStr = r.dollar[r.NF - 1]
                            self.timeTakenDict[entry_code] = float(timeTakenStr)
    #                        nTdebug("Found time: %s" % self.timeTakenDict[entry_code])
                        if line.startswith('Traceback (most recent call last)'):
    #                        nTdebug("Matched line: %s" % line)
                            if entry_code in entry_list_crashed:
                                nTwarning("%s was already found before; not adding again." % entry_code)
                            else:
                                entry_list_crashed.append(entry_code)
                                entryCrashed = True
                        if line.count('ERROR:'):
                            nTerror("Matched line: %s" % line)
                            entryWithErrorMessage = True
                        if line.count('Aborting'):
                            nTdebug("Matched line: %s" % line)
                            entryCrashed = True
                            if entry_code in entry_list_crashed:
                                nTwarning("%s was already found before; not adding again." % entry_code)
                            else:
                                entry_list_crashed.append(entry_code)
                    if entryWithErrorMessage:
                        nTerror("Above for entry: %s" % entry_code)
                    if entryCrashed:
                        continue # don't mark it as stopped anymore.

                    indexFileEntry = os.path.join(cingDirEntry, "index.html")
                    if os.path.exists(indexFileEntry):
                        entry_list_done.append(entry_code)
                    # end if index exists
                # end for log
            # end for entry
        # end for subdir
        return (entry_list_tried, entry_list_done, entry_list_crashed)

Example #15

0

Show file

File: xeasy.py Project: VuisterLab/cing

    def __init__(self, seqFile, protFile, convention):
        NTdict.__init__(self)

        #print '>', seqFile, protFile
        # parse the seqFile
        self.seq = {}
        resNum = 1
        self.resCount = 0
        for f in AwkLike(seqFile, commentString='#'):
            #print '>>', f.dollar[0]
            if (not f.isEmpty() and not f.isComment('#')):
                if (f.dollar[1] in
                        CYANA_NON_RESIDUES  # skip the bloody CYANA non-residue stuff
                    ):
                    pass

                elif (not NTdb.isValidResidueName(f.dollar[1], convention)):
                    nTerror(
                        'Xeasy: residue "%s" invalid for convention "%s" in "%s:%d"',
                        f.dollar[1], convention, seqFile, f.NR)
                    self.error = 1
                else:
                    if (f.NF > 1):
                        resNum = f.int(2)
                        if resNum == None:
                            self.error = 1
                        #end if
                    #endif
                    self.seq[resNum] = f.dollar[
                        1]  # store original 'convention' name
                    resNum += 1
                    self.resCount += 1
                #end if
            #end if
        #end for
        self.seqFile = seqFile
        self.convention = convention

        # parse the prot file
        self.prot = {}
        self.protCount = 0
        self.error = 0
        for f in AwkLike(protFile, commentString='#'):
            if f.NF == 5:
                # Xeasy/Cyana atom index
                index = f.int(1)
                atomName = f.dollar[4]
                resNum = f.int(5)
                if resNum not in self.seq:
                    nTwarning(
                        'Xeasy: undefined residue number %d in "%s:%d" (%s)' %
                        (resNum, protFile, f.NR, f.dollar[0]))
                    self.error = 1
                else:
                    resName = self.seq[resNum]
                    if not NTdb.isValidAtomName(resName, atomName, convention):
                        nTwarning(
                            'Xeasy parsing "%s:%d": invalid atom "%s" for residue %s%d'
                            % (protFile, f.NR, atomName, resName, resNum))
                        self.error = 1
                    else:
                        p = NTdict(index=index,
                                   shift=f.float(2),
                                   error=f.float(3),
                                   atomName=atomName,
                                   resNum=resNum,
                                   resName=resName,
                                   atom=None)
                        self.prot[index] = p
                        self.protCount += 1
                    #end if
                #end if
            #end if
        #end for

        self.protFile = protFile
        nTmessage('Xeasy.__init__: parsed %d residues, %d atoms from %s, %s',
                  self.resCount, self.protCount, self.seqFile, self.protFile)

Example #16

0

Show file

File: pdbCing.py Project: fenglb/ccpnmr2.4

    def getCingEntryInfo(self):
        """Returns True for error
        Will remove entry directories if they do not occur in NRG up to a maximum number as not to whip out
        every one in a single blow by accident.
        """

        nTmessage(
            "Get the entries tried, todo, crashed, and stopped in PDB-CING from file system."
        )

        self.entry_list_obsolete = NTlist()
        self.entry_list_tried = NTlist()
        self.entry_list_crashed = NTlist()
        self.entry_list_stopped = NTlist(
        )  # mutely exclusive from entry_list_crashed
        self.entry_list_done = NTlist()
        self.entry_list_todo = NTlist()

        subDirList = os.listdir(DATA_STR)
        for subDir in subDirList:
            if len(subDir) != 2:
                if subDir != DS_STORE_STR:
                    nTdebug('Skipping subdir with other than 2 chars: [' +
                            subDir + ']')
                continue
            entryList = os.listdir(os.path.join(DATA_STR, subDir))
            for entryDir in entryList:
                entry_code = entryDir
                if not is_pdb_code(entry_code):
                    if entry_code != DS_STORE_STR:
                        nTerror("String doesn't look like a pdb code: " +
                                entry_code)
                    continue
#                nTdebug("Working on: " + entry_code)

                entrySubDir = os.path.join(DATA_STR, subDir, entry_code)
                if not entry_code in self.entry_list_pdb:
                    nTwarning(
                        "Found entry %s in PDB-CING-CING but not in PDB. Will be obsoleted in PDB-CING too"
                        % entry_code)
                    if len(self.entry_list_obsolete
                           ) < self.entry_to_delete_count_max:
                        rmdir(entrySubDir)
                        self.entry_list_obsolete.append(entry_code)
                    else:
                        nTerror(
                            "Entry %s in PDB-CING not obsoleted since there were already removed: %s"
                            % (entry_code, self.entry_to_delete_count_max))
                # end if

                cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing")
                if not os.path.exists(cingDirEntry):
                    nTmessage("Failed to find directory: %s" % cingDirEntry)
                    continue

                # Look for last log file
                logList = glob(entrySubDir + '/log_validateEntry/*.log')
                if not logList:
                    nTmessage("Failed to find any log file in directory: %s" %
                              entrySubDir)
                    continue
                # .cing directory and .log file present so it was tried to start but might not have finished
                self.entry_list_tried.append(entry_code)

                logLastFile = logList[-1]
                #                nTdebug("Found logLastFile %s" % logLastFile)
                #                set timeTaken = (` grep 'CING took       :' $logFile | gawk '{print $(NF-1)}' `)
                #                text = readTextFromFile(logLastFile)
                entryCrashed = False
                for r in AwkLike(logLastFile):
                    line = r.dollar[0]
                    if entryCrashed:
                        nTdebug(line)
                    if line.startswith('CING took       :'):
                        #                        nTdebug("Matched line: %s" % line)
                        timeTakenStr = r.dollar[r.NF - 1]
                        self.timeTakenDict[entry_code] = float(timeTakenStr)
#                        nTdebug("Found time: %s" % self.timeTakenDict[entry_code])
                    if line.startswith('Traceback (most recent call last)'):
                        nTdebug("%s Matched line: %s" % (entry_code, line))
                        if entry_code in self.entry_list_crashed:
                            nTwarning(
                                "%s was already found before; not adding again."
                                % entry_code)
                        else:
                            self.entry_list_crashed.append(entry_code)
                            entryCrashed = True
                if entryCrashed:
                    continue  # don't mark it as stopped anymore.

                # end for AwkLike
                if not self.timeTakenDict.has_key(entry_code):
                    # was stopped by time out or by user or by system (any other type of stop but stack trace)
                    nTmessage(
                        "%s Since CING end message was not found assumed to have stopped"
                        % entry_code)
                    self.entry_list_stopped.append(entry_code)
                    continue

                # Look for end statement from CING which shows it wasn't killed before it finished.
                indexFileEntry = os.path.join(cingDirEntry, "index.html")
                if not os.path.exists(indexFileEntry):
                    nTmessage(
                        "%s Since index file %s was not found assumed to have stopped"
                        % (entry_code, indexFileEntry))
                    self.entry_list_stopped.append(entry_code)
                    continue

                projectHtmlFile = os.path.join(cingDirEntry, entry_code,
                                               "HTML/index.html")
                if not os.path.exists(projectHtmlFile):
                    nTmessage(
                        "%s Since project html file %s was not found assumed to have stopped"
                        % (entry_code, projectHtmlFile))
                    self.entry_list_stopped.append(entry_code)
                    continue

                if False:  # Default is True
                    molGifFile = os.path.join(cingDirEntry, entry_code,
                                              "HTML/mol.gif")
                    if not os.path.exists(molGifFile):
                        nTmessage(
                            "%s Since mol.gif file %s was not found assumed to have stopped"
                            % (entry_code, projectHtmlFile))
                        self.entry_list_stopped.append(entry_code)
                        continue

                self.entry_list_done.append(entry_code)
            # end for entryDir
        # end for subDir
        timeTakenList = NTlist()  # local variable.
        timeTakenList.addList(self.timeTakenDict.values())
        nTmessage("Time taken by CING by statistics\n%s" %
                  timeTakenList.statsFloat())

        if not self.entry_list_tried:
            nTerror("Failed to find entries that CING tried.")

        self.entry_list_todo.addList(self.entry_list_pdb)
        self.entry_list_todo = self.entry_list_todo.difference(
            self.entry_list_done)

        nTmessage("Found %s entries that CING tried (T)." %
                  len(self.entry_list_tried))
        nTmessage("Found %s entries that CING crashed (C)." %
                  len(self.entry_list_crashed))
        nTmessage("Found %s entries that CING stopped (S)." %
                  len(self.entry_list_stopped))
        if not self.entry_list_done:
            nTerror("Failed to find entries that CING did.")
        nTmessage("Found %s entries that CING did (B=A-C-S)." %
                  len(self.entry_list_done))
        nTmessage("Found %s entries todo (A-B)." % len(self.entry_list_todo))
        nTmessage("Found %s entries in PDB-CING made obsolete." %
                  len(self.entry_list_obsolete))

Example #17

0

Show file

File: refine.py Project: VuisterLab/cing

def parseOutput(config, project, parameters):
    """
    Parse the output in the Jobs directory
    parameters is a NTxplor instance

    Return None on error or results on success.
    """
    nTmessage("\n-- parseOutput --")

    xplor = Xplor(config,
                  parameters,
                  project=project,
                  outPath=config.directories.refined)

    logFileNameFmt = 'refine_%d.log'
    resultFileName = 'parsedOutput.txt'
    bestModelsFileNameFmt = 'best%dModels.txt'
    bestModelsParameterName = 'bestModels'
    bestModels = parameters.best  # Integer
    allPreviousModels = parameters.models  # String
    allPreviousModelCount = parameters.bestAnneal  # Integer

    if getDeepByKeysOrAttributes(parameters, USE_ANNEALED_STR):
        logFileNameFmt = 'anneal_%d.log'
        resultFileName = 'parsedAnnealOutput.txt'
        bestModelsFileNameFmt = 'best%dModelsAnneal.txt'
        bestModelsParameterName = 'models'
        bestModels = parameters.bestAnneal
        allPreviousModels = parameters.modelsAnneal
        allPreviousModelCount = parameters.modelCountAnneal

    nTdebug('logFileNameFmt:         %s' % logFileNameFmt)
    nTdebug('resultFileName:         %s' % resultFileName)
    nTdebug('bestModelsFileNameFmt:  %s' % bestModelsFileNameFmt)
    nTdebug('bestModelsParameterName:%s' % bestModelsParameterName)
    nTdebug('bestModels:             %s (int)' % bestModels)
    nTdebug('allPreviousModels:      %s (string)"' % allPreviousModels)
    nTdebug('allPreviousModelCount:  %s (int)' % allPreviousModelCount)

    results = NTlist()
    keys = [
        'model', 'eTotal', 'Enoe', 'NOErmsd', 'NOEnumber', 'NOEbound1',
        'NOEviol1', 'NOEbound2', 'NOEviol2', 'DIHEDrmsd', 'DIHEDnumber',
        'DIHEDbound', 'DIHEDviol'
    ]

    # parse all output files
    for i in asci2list(allPreviousModels):

        file = xplor.checkPath(xplor.directories.jobs, logFileNameFmt % i)
        nTmessage('==> Parsing %s', file)

        data = NTdict()
        for key in keys:
            data[key] = None
        data.model = i

        foundEnergy = 0
        foundNOE1 = 0
        foundNOE2 = 0
        foundDIHED = 0
        awkf = AwkLike(file)
        for line in awkf:
            #            nTdebug("line: %s" % line.dollar[0])
            if (not foundEnergy) and find(
                    line.dollar[0],
                    '--------------- cycle=     1 ----------------') >= 0:
                awkf.next()
                #                nTdebug("Getting total energy from line: %s" % line.dollar[0])
                data['eTotal'] = float(line.dollar[0][11:22])

                awkf.next()
                #                nTdebug("Getting NOE energy from line: %s" % line.dollar[0])
                if line.dollar[0].count(
                        "E(NOE"):  # Dirty hack; use regexp next time.
                    #                    nTdebug("Bingo")
                    data['Enoe'] = float(line.dollar[0][68:75])
                else:
                    awkf.next()
                    #                    nTdebug("Getting NOE energy (try 2) from line: %s" % line.dollar[0])
                    data['Enoe'] = float(line.dollar[0][68:75])
                # end if
                foundEnergy = 1
            elif (not foundNOE1) and find(line.dollar[0],
                                          'NOEPRI: RMS diff. =') >= 0:
                data['NOErmsd'] = float(line.dollar[5][:-1])
                data['NOEbound1'] = float(line.dollar[7][:-2])
                data['NOEviol1'] = int(line.dollar[8])
                foundNOE1 = 1
            elif (not foundNOE2) and find(line.dollar[0],
                                          'NOEPRI: RMS diff. =') >= 0:
                data['NOEbound2'] = float(line.dollar[7][:-2])
                data['NOEviol2'] = int(line.dollar[8])
                data['NOEnumber'] = float(line.dollar[10])
                foundNOE2 = 1
            elif (not foundDIHED) and find(
                    line.dollar[0],
                    'Number of dihedral angle restraints=') >= 0:
                data['DIHEDnumber'] = int(line.dollar[6])
                awkf.next()
                data['DIHEDbound'] = float(line.dollar[6][:-1])
                data['DIHEDviol'] = int(line.dollar[7])
                awkf.next()
                data['DIHEDrmsd'] = float(line.dollar[3])
                foundDIHED = 1
            #endif
        #end for
        eTotal = getDeepByKeysOrAttributes(data, 'eTotal')
        if eTotal == None:
            nTwarning(
                "Failed to read energy for model: %s (probably crashed/stopped)."
                % i)
            continue
        results.append(data)
    #end for i

    # Since above compile might have ommissions check here how many may continue.
    resultCount = len(results)
    if allPreviousModelCount > resultCount:
        nTwarning("Will only consider %s results." % resultCount)
    elif allPreviousModelCount != resultCount:
        nTwarning(
            "Got more results (%s) than expected input (%s). Will use all results."
            % (bestModels, resultCount))
    # end if

    # sort the results
    if parameters.sortField in keys:
        #        nTdebug("Now sorting on field: %s" % parameters.sortField)
        #        if 0: # The below failed at some point but is also not much in use. Removing.
        #            myComp = CompareDict(parameters.sortField)
        #            results.sort(myComp)
        #        else:
        NTsort(results, parameters.sortField, inplace=True)
    else:
        parameters.sortField = None
    #endif

    # print results to file and screen
    resultFile = open(xplor.joinPath(resultFileName), 'w')
    msg = '\n=== Results: sorted on "%s" ===' % parameters.sortField
    nTmessage(msg)
    fprintf(resultFile, msg + '\n')
    fmt = '%-11s '
    for k in keys:
        nTmessageNoEOL(fmt % str(k))
        fprintf(resultFile, fmt, str(k))
    #end for
    nTmessage('')
    fprintf(resultFile, '\n')
    for data in results:
        for k in keys:
            value = val2Str(getDeepByKeysOrAttributes(data, k), fmt, count=11)
            nTmessageNoEOL(value)
            fprintf(resultFile, fmt, value)
        #end for
        nTmessage('')
        fprintf(resultFile, '\n')
    #end for

    # best results to put in parameter file.
    resultCountBest = min(resultCount, bestModels)
    if resultCountBest > 0:
        msgLine = '\n=== Averages best %d models ===' % resultCountBest
        nTmessage(msgLine)
        fprintf(resultFile, msgLine + '\n')
        for key in keys:
            getKey = Key(key)
            values = map(getKey, results[:resultCountBest])
            av, sd, dummy_n = nTaverage(values)
            msgLine = '%-12s: %10.3f +/- %-10.3f' % (key, av, sd)
            nTmessage(msgLine)
            fprintf(resultFile, msgLine + '\n')
        #end for
        nTmessage('\n')
        fprintf(resultFile, '\n\n')

        fname = xplor.joinPath(bestModelsFileNameFmt % resultCountBest)
        f = open(fname, 'w')
        parameters[bestModelsParameterName] = ''
        for i in range(resultCountBest):
            fprintf(f, '%s/%s\n', xplor.outPath,
                    xplor.baseName % results[i].model)
            parameters[bestModelsParameterName] = '%s%s,' % (
                parameters[bestModelsParameterName], results[i].model)
        #end for
        f.close()
        parameters[bestModelsParameterName] = parameters[
            bestModelsParameterName][:-1]  # Remove trailing comma.
        nTmessage('==> Best %d models (%s) listed in %s\n', resultCountBest,
                  parameters[bestModelsParameterName], fname)
    else:
        parameters[bestModelsParameterName] = allPreviousModels
    #end if
    resultFile.close()
    parameters.toFile(xplor.joinPath(PARAMETERS_FILE_NAME))
    return results

Example #18

0

Show file

    def parseResult(self):
        """
        Parse .dssp files and store result in dssp NTdict
        of each residue of mol.
        Return True on error.
        """
        modelCount = self.molecule.modelCount
        #        nTdebug("Parse dssp files and store result in each residue for " + repr(modelCount) + " model(s)")

        for model in range(modelCount):
            fullnameOut = 'model_%03d.dssp' % model
            path = os.path.join(self.rootPath, fullnameOut)
            if not os.path.exists(path):
                nTerror('Dssp.parseResult: file "%s" not found', path)
                return True

#            nTmessage("Parsing " + path)
            isDataStarted = False
            for line in AwkLike(path):
                if line.dollar[0].find("RESIDUE AA STRUCTURE BP1 BP2") >= 0:
                    isDataStarted = True
                    continue
                if not isDataStarted:
                    continue
#                nTdebug("working on line: %s" % line.dollar[0])
                if not len(line.dollar[0][6:10].strip()):
                    #                    nTdebug('Skipping line without residue number')
                    continue
                result = self._parseLine(line.dollar[0], self.dsspDefs)
                if not result:
                    nTerror(
                        "Failed to parse dssp file the below line; giving up.")
                    nTerror(line.dollar[0])
                    return True
                chain = result['chain']
                resNum = result['resNum']
                residue = self.molecule.decodeNameTuple(
                    (None, chain, resNum, None))
                if not residue:
                    nTerror(
                        'in Dssp.parseResult: residue not found (%s,%d); giving up.'
                        % (chain, resNum))
                    return True
                # For first model reset the dssp dictionary in the residue
                if model == 0 and residue.has_key('dssp'):
                    del (residue['dssp'])
                residue.setdefault('dssp', NTdict())

                #                nTdebug("working on residue %s" % residue)
                for field, value in result.iteritems():
                    if not self.dsspDefs[field][3]:  # Checking store parameter.
                        continue
                    # Insert for key: "field" if missing an empty  NTlist.
                    residue.dssp.setdefault(field, NTlist())
                    residue.dssp[field].append(value)
#                    nTdebug("field %s has values: %s" % ( field, residue.dssp[field]))
#end for
#end for
#end for
        for residue in self.molecule.allResidues():
            if residue.has_key(DSSP_STR):
                #                residue[DSSP_STR].consensus = residue[DSSP_STR].secStruct.setConsensus(CONSENSUS_SEC_STRUCT_FRACTION)
                residue[DSSP_STR].consensus = residue[
                    DSSP_STR].secStruct.setConsensus(useLargest=True)
                residue[DSSP_STR].keysformat()
        #end for
        self.project.status.dssp.parsed = True

Example #19

0

Show file

File: xplor2pdb.py Project: VuisterLab/cing

                 )
(options, args) = parser.parse_args()

#print options
#print args

if options.doc:
    parser.print_help(file=sys.stdout)
    print __doc__
    sys.exit(0)

parser.check_required('-o')

if options.modelList != None:
    files = []
    for line in AwkLike( options.modelList ):
        files.append( line.dollar[1] )
    #end for
else:
    files = args
#endif

# convention
convention = 'PDB'
if options.convention != None:
    convention = options.convention
#end if

verbose = 1

#=======================================================================

Example #20

0

Show file

def importUpl( project, uplFile, convention, lower = 0.0 ):
    """
    Read Cyana upl file
    return a DistanceRestraintList or None on error
    """
    
    #print 'Convention: ' + convention
    
    maxErrorCount = 50
    errorCount = 0

    # check the molecule
    if not project or not project.molecule:
        nTerror("importUpl: initialize molecule first")
        return None
    #end if
    molecule = project.molecule
    # Sometimes set from other than CYANA coordinate file.
#    chainId = molecule.chains[0].name # assumed unkown rite?

    if not os.path.exists( uplFile ):
        nTerror('importUpl: file "%s" not found', uplFile)
        return None
    #end if

    _dir,name,_ext = nTpath( uplFile )
    result        = project.distances.new( name=name, status='keep')
    atomDict      = molecule.getAtomDict(convention)

    for line in AwkLike( uplFile, commentString="#", minNF=7 ):
#        if line.isComment():
##            nTdebug("Skipping upl file line with comment: [" + line.dollar[0] +']')
#            continue
#        if line.NF < 7:
##            nTdebug("Skipping upl file line with too few fields: [" + line.dollar[0] +']')
#            continue
        atmIdxList = [[1,3],[4,6]]
        atmList = []
#        i=0
        for atmIdx in atmIdxList:
#            nTdebug("Doing atmIdx: " + repr(atmIdx))
            t = (line.int(atmIdx[0]), line.dollar[atmIdx[1]])
            atm = None
            if atomDict.has_key(t):
                atm = atomDict[t]
#            atm = molecule.decodeNameTuple( (convention, None, line.int(atmIdx[0]), line.dollar[atmIdx[1]]),
#                                            fromCYANA2CING=True)
            if not atm:
                if errorCount <= maxErrorCount:
                    nTerror('Failed to decode for atom %s; line: %s', t, line.dollar[0] )
                if errorCount == maxErrorCount+1:
                    nTerror("And so on")
                errorCount += 1
#                i+=1
                continue
            atmList.append( atm )
#            i+=1
        if len(atmList) != 2:
            continue
        # Unpack convenience variables.
        atm1 = atmList[0]
        atm2 = atmList[1]
#        nTdebug("atom 1: " + repr(atm1))
#        nTdebug("atom 2: " + repr(atm2))
        upper = line.float(7)
        # ambiguous restraint, should be append to last one
        if upper == 0:
            result().appendPair( (atm1,atm2) )
            continue
        if not upper:
            nTerror("Skipping line without valid upper bound on line: [" + line.dollar[0]+']')
            continue



        r = DistanceRestraint( atomPairs= [(atm1,atm2)], lower=lower, upper=upper )
        result.append( r )
        # also store the Candid info if present
        if line.NF >= 9:
            r.peak = line.int( 9 )
        if line.NF >= 11:
            r.SUP = line.float( 11 )
        if line.NF >= 13:
            r.QF = line.float( 13 )
    #end for
    if errorCount:
        nTerror("Found number of errors importing upl file: %s" % errorCount)

#    nTmessage("Imported upl items: " + repr(len(result)))
    nTmessage('==> importUpl: new %s from "%s"', result, uplFile )
    return result

Example #21

0

Show file

File: BMRB.py Project: VuisterLab/cing

def importFromBMRB(project, bmrbFile):
    """
        Import chemical shifts from edited BMRB file
        No reassigned Pseudo atoms yet;
        Return molecule instance or None on error
    """

    if not project.molecule:
        nTerror("Error importBMRB: no molecule defined")
        return None
    #end if

    mol = project.molecule
    mol.newResonances(source=bmrbFile)

    error = False
    #    for f in AwkLike( bmrbFile, minNF = 8, commentString = '#' ):
    #
    #        resName = f.dollar[3]
    #        resNum  = f.int(2)
    #
    #        atomName= f.dollar[4]
    #        shift   = f.float(6)
    #        serror  = f.float(7)
    #        _ambig   = f.int(8)

    for f in AwkLike(bmrbFile, minNF=9, commentString='#'):

        resName = f.dollar[4]
        resNum = f.int(2)

        atomName = f.dollar[5]
        shift = f.float(7)
        serror = f.float(8)
        _ambig = f.int(9)

        atm = mol.decodeNameTuple(
            (IUPAC, Chain.defaultChainId, resNum, atomName))

        if not atm:
            nTerror('Error initBMRB: invalid atom %s %s line %d (%s)', resName,
                    atomName, f.NR, f.dollar[0])
            error = True
        else:
            atm.resonances().value = shift
            atm.resonances().error = serror
            if _ambig == 1 and atm.isProChiral():
                atm.stereoAssigned = True
        #end if
    #end for

    # now fix the assignments;
    for atm in mol.allAtoms():
        # Check if all realAtoms are assigned in case there is a pseudo atom
        if atm.isAssigned(
                resonanceListIdx=RESONANCE_LIST_IDX_ANY
        ) and not atm.isStereoAssigned() and atm.hasPseudoAtom():
            fix = False
            pseudo = atm.pseudoAtom()
            for a in pseudo.realAtoms():
                if not a.isAssigned(resonanceListIdx=RESONANCE_LIST_IDX_ANY):
                    fix = True
                    break
                #end if
            #end for
            if fix:
                pseudo.resonances().value = atm.resonances().value
                pseudo.resonances().error = atm.resonances().error
                atm.resonances().value = NaN
                atm.resonances().value = NaN
                nTmessage('Deassigned %s, assigned %s', atm, pseudo)
            #end if
        #end if
    #end for

    if error:
        nTerror('==> importFromBMRB: completed with error(s)')
    else:
        nr = getDeepByKeysOrAttributes(f, 'NR')  # pylint: disable=W0631
        nTmessage('==> importFromBMRB: successfully parsed %d lines from %s',
                  nr, bmrbFile)
    #end if

    if error:
        return None
    return mol

Example #22

0

Show file

def importAco( project, acoFile ):
    """Read Cyana acoFile
       ( 512 THR   PSI     116.0   148.0)
       convention = CYANA or CYANA2
       return a DihedralRestraintList or None on error
    """
    maxErrorCount = 50
    errorCount = 0
    # check the molecule
    if (not project or not project.molecule ):
        nTerror("importAco: initialize molecule first")
        return None
    #end if
    molecule = project.molecule
    # Sometimes set from other than CYANA coordinate file.
#    chainId = molecule.chains[0].name

    if not os.path.exists( acoFile ):
        nTerror('importAco: file "%s" not found\n', acoFile)
        return None
    #end if

    _dir,name,_ext = nTpath( acoFile )
    result     = project.dihedrals.new( name=name, status='keep')
    resNumDict = molecule.getResNumDict()

    nTmessage("Now reading: " + acoFile)
    for line in AwkLike( acoFile, commentString = '#' , minNF = 5):

        resNum = line.int(1)
        res    = None
        if resNum in resNumDict:
            res = resNumDict[resNum]

        angle  = line.dollar[3]
        lower  = line.float(4)
        upper  = line.float(5)
        if res and angle in res.db:
            atoms = translateTopology( res, res.db[angle].atoms )
#            print '>', atoms, res, res.db[angle]
            if None in atoms:
                if errorCount <= maxErrorCount:
                    nTerror("Failed to decode all atoms from line:"+ line.dollar[0])
                if errorCount == (maxErrorCount+1):
                    nTerror("And so on")
                errorCount += 1
                continue
            else:
                r = DihedralRestraint( atoms = atoms, lower=lower, upper=upper,
                                            angle = angle, residue = res
                                          )
                #print r.format()
                result.append( r )
            #end if
        #end if
    #end for

    if errorCount:
        nTerror("Found number of errors importing upl file: %s" % errorCount)
#    nTmessage("Imported items: " + repr(len(result)))
    nTmessage('==> importAco: new %s from "%s"', result, acoFile )
    return result

Example #23

0

Show file

File: xeasy.py Project: VuisterLab/cing

    def importPeaks(self, molecule, peakFile, status='keep'):
        """Read Xeasy peak file
           returns a PeaksList instance or None on error

           JFD: description of XEASY peak list format:
  43   1.760   3.143 1 T          0.000e+00  0.00e+00 -   0 2260 2587 0
  46   1.649   4.432 1 T          1.035e+05  0.00e+00 r   0 2583 2257 0
   ^ peak id                      ^ height
       ^ chemical shifts                     ^ height dev   ^ resonance ids
                     ^ ?                              ^ ?             ^ ?
                       ^ ?                                ^ ?

        resonance id is zero for unassigned.
        """
        #print '>>', molecule, peakFile

        self.map2molecule(molecule)

        _path, name, _ext = nTpath(peakFile)
        peaks = PeakList(name=name, status=status)

        dimension = 0
        # f stands for field.
        for f in AwkLike(peakFile):
            if (f.NR == 1 and f.NF == 5):
                dimension = f.int(5)

            elif (not f.isComment('#')):
                #                 if (f.NF == 12):
                #                     dimension = 2
                #                 elif (f.NF == 13 or f.NF == 14 or (f.NF>):
                #                     dimension = 3
                #                 else:
                #                     nTerror('Xeasy.importPeaks: invalid number of fields (%d) in file "%s" on line %d (%s)',
                #                              f.NF, peakFile, f.NR, f.dollar[0]
                #                            )
                #                     return None
                #                 #end if
                if not dimension:
                    nTerror(
                        'Xeasy.importPeaks: invalid dimensionality in file "%s" (line %d, "%s")'
                        % (peakFile, f.NR, f.dollar[0]))
                    return None
                #end if

                cur = 1

                # preserve the Xeasy peak id
                peakId = f.int(cur)
                if (peakId == None):
                    return None
                cur += 1

                peakpos = []
                for _i in range(X_AXIS, dimension):
                    p = f.float(cur)
                    if (p == None):
                        return None
                    peakpos.append(p)
                    cur += 1
                #end if

                cur += 2  # skip two fields
                height = f.float(cur)
                if height == None:
                    return None
                cur += 1
                heightError = f.float(cur)
                if heightError == None:
                    return None
                cur += 1

                resonances = []
                error = 0
                cur += 2  # skip two fields
                for _i in range(X_AXIS, dimension):
                    aIndex = f.int(cur)
                    if aIndex == None:
                        return None
                    cur += 1
                    # 0 means unassigned according to Xeasy convention
                    if aIndex == 0:
                        resonances.append(None)
                    else:
                        if not aIndex in self.prot:
                            nTerror(
                                'Xeasy.importPeaks: invalid atom id %d on line %d (%s)',
                                aIndex, f.NR, f.dollar[0])
                            error = 1
                            break
                        else:
                            atom = self.prot[aIndex].atom
                            if atom != None:
                                resonances.append(atom.resonances())
                            else:
                                resonances.append(None)
                        #end if
                    #end if
                #end for

                if not error:
                    peak = Peak(
                        dimension=dimension,
                        positions=peakpos,
                        height=height,
                        heightError=heightError,
                        resonances=resonances,
                    )
                    # store original peak id
                    peak.xeasyIndex = peakId
                    peaks.append(peak)
                #end if
            #end if
        #end for

        nTmessage('Xeasy.importPeaks: extracted %d peaks from %s', len(peaks),
                  peakFile)
        #end if

        return peaks