def restraintOverview(entryNames, extractDir=None): """ Make overview of restraint data available, and check data type """ result = {} if not extractDir: extractDir = tempfile.mkdtemp(dir=casdConstants.topTmpDir) for entryName in entryNames: result[entryName] = info = {} inputDir = os.path.join(allDataDir, entryName[1:3], entryName, entryName + '.input', 'restraints') ll = os.listdir(inputDir) if len(ll) == 1: source = os.path.join(inputDir, ll[0]) # restraints, check them targetDir = os.path.join(extractDir, entryName[1:3], entryName) if not os.path.exists(targetDir): casdUtil.extractCompressedFile(source, targetDir, entryName) checkDir = casdUtil.getLowestSubDir(targetDir, followDirs=('cns_format', )) convertType = checkRestraintTypes(checkDir, entryName) elif ll: print entryName, 'ERROR, multifiles', ll else: print entryName, 'NONE'
def makeCcpnProject(entryName): """ Execute conversion to CCPN project """ logFileHandle = None try: #entryName = casdUtil.getEntryName(info) orgName = entryName.split('_')[0] + '_Org' print 'Starting', entryName logFileHandle = casdUtil.createLogFile(entryName, 'extractEntry') # get CCPN project from Org data #orgName = casdUtil.getEntryName(info, isOriginal=True) path = os.path.join(allDataDir, orgName[1:3], orgName) ppath = os.path.join(path, orgName) ff = ppath + '.tgz' if not (os.path.exists(ppath) or os.path.exists(ff)): raise Exception("NO original CCPN project in %s" % path) if not os.path.isdir(ppath): casdUtil.extractCompressedFile(ff, path, entryName) if not os.path.exists(ppath): raise Exception("NO extracted CCPN project in %s" % path) ccpnProject = genIo.loadProject(ppath, suppressGeneralDataDir=True) # neutralize any other pending CASD-NMR projects casdRun = casdUtil.prepareNmrCalcRun(ccpnProject, 'CING') for run in casdRun.nmrCalcStore.findAllRuns(status='pending'): if run is not casdRun: run.status = 'provisional' # dataDir = os.path.join(allDataDir, entryName[1:3], entryName) tmpdir = tempfile.mkdtemp(dir=casdConstants.topTmpDir) try: # Extract structure data tmpstruc = os.path.join(tmpdir, 'structures') src = casdUtil.getInputFile(entryName, 'structures') casdUtil.extractCompressedFile(src, tmpstruc, entryName, okExts=('pdb', )) tmpstruc = casdUtil.getLowestSubDir(tmpstruc, followDirs=('cns_format', )) structureFiles = os.listdir(tmpstruc) # Extract restraint data tmprestr = os.path.join(tmpdir, 'restraints') src = casdUtil.getInputFile(entryName, 'restraints', ignoreErrors=True) if src: casdUtil.extractCompressedFile(src, tmprestr, entryName) tmprestr = casdUtil.getLowestSubDir( tmprestr, followDirs=('cns_format', )) restraintFiles = os.listdir(tmprestr) else: restraintFiles = () print 'WARNING, %s no restraints found at %s' % (entryName, src) # read in data # FormatConverter version fcw = FormatConverterWrapper(ccpnProject=ccpnProject) # dataIo version #fcw = None if structureFiles: # NBNB uses Rasmus in-development trunk structure reading. # WOrks well. Temporarily disabled # read in structures pdbFiles = [ x for x in structureFiles if any( x.endswith(y) for y in casdConstants.pdbEndings) ] floatFiles = [ x for x in structureFiles if x.endswith('.float') ] if floatFiles: # Use only pdb files with names that match float files stems = set(x[:-6] for x in floatFiles) pdbFiles = [x for x in pdbFiles if x[:-4] in stems] pdbPaths = [os.path.join(tmpstruc, x) for x in pdbFiles] if True: #if fcw is None: #Always use dataIo version # dataIo version ensemble = StructureIo.getStructureFromFiles( ccpnProject.findFirstMolSystem(), pdbPaths) if ensemble is None: print '### Skipping %s, no structures loaded' % entryName else: print '### num files, ensemble', len( pdbPaths), ensemble.ensembleId casdRun.newStructureEnsembleData( name=entryName, structureEnsemble=ensemble) else: # FormatConverter version #fileInfo = fcw.determineFileInfo(pdbPaths[0]) if len(pdbPaths) != 1: print 'WARNING %s pdb files, only one read. TBD FIX' % len( pdbPaths) dataType = 'coordinates' formatName = 'pseudoPdb' pdbPath = pdbPaths[0] print 'Reading structure file', dataType, formatName, pdbPath fcw.readFile(dataType, formatName, pdbPath) # NBNB TODO 1) How to set up trying true PDB before pseudoPdb? # 2) How to read several files into an ensemble? # 3) How to get hold of the new ensemble for putting in NmrCalc else: print '### Skipping %s, no structure file' % entryName # Make NmrCalc object for shift list # NBNB consider later: if we are reading in assigned peaks, # shifts may change. NBNB shiftLists = casdRun.nmrCalcStore.nmrProject.findAllMeasurementLists( className='ShiftList') if len(shiftLists) == 1: casdRun.newMeasurementListData( name='Shiftlist', measurementList=shiftLists.pop()) else: print 'WARNING. %s shift lists found, should be s' % len( shiftLists) # Restraints reading if restraintFiles: if fcw is None: # NBNB TBD dataIo restraint reading to go here #for rfile in restraintFiles: # fileInfo = casdUtil.getFileInfo(tmprestr, rfile) pass else: # FormatConverter version restraintLists = [] for rfile in restraintFiles: rpath = os.path.join(tmprestr, rfile) fileInfo = fcw.determineFileInfo(rpath) dataType = fileInfo.get('dataType') formatName = fileInfo.get('formatName') if dataType is None or formatName is None: print 'Skipping unidentified restraint file', dataType, formatName, rfile elif dataType not in ( 'distanceConstraints', 'dihedralConstraints', 'rdcConstraints', ): print 'Skipping wrong type of restraint file', dataType, formatName, rfile else: print 'Reading restraint file', dataType, formatName, rfile fcw.readFile(dataType, formatName, rpath) if fcw.conversionSuccess: print("Successful restraint file read:\n%s" % fcw.conversionInfo) restraintLists.append(fcw.ccpnObjectOrList) else: print("Failed restraint file read:\n%s" % fcw.conversionInfo) if restraintLists: print("Found restraint lists: %s" % len(restraintLists)) casdRun.newConstraintStoreData( constraintLists=restraintLists, name='Restraintlists') # linkResonances print '### linking resonances' linkingInfo = fcw.linkAllResonancesToAtoms() finally: shutil.rmtree(tmpdir, ignore_errors=True) pass # rename and package project ccpnOutputDir = os.path.join(dataDir, entryName) genIo.saveProject(ccpnProject, newPath=ccpnOutputDir, newProjectName=entryName, checkValid=True, removeExisting=True) genIo.packageProject(ccpnProject, ccpnOutputDir) shutil.rmtree(ccpnOutputDir) ccpnOutputPath = ccpnOutputDir + '.tgz' print 'SUCCESS, %s saved to %s' % (entryName, ccpnOutputPath) return ccpnOutputPath except: print 'ERROR for %s' % (entryName) traceback.print_exc(file=sys.stdout) finally: if logFileHandle is not None: logFileHandle.close() sys.stdout = sys.__stdout__