def run(self, refPdb=None, inPdb=None, outPdb=None, connectivityRadius=None, originHand=True, cleanup=False): """FOO """ self._reset() self.logfile = outPdb +"_{}.log".format(str(uuid.uuid1())) cmd= [ 'csymmatch', "-pdbin-ref", refPdb, "-pdbin", inPdb, "-pdbout", outPdb ] if originHand: cmd += [ "-origin-hand" ] if connectivityRadius: cmd += [ "-connectivity-radius", connectivityRadius ] retcode = ample_util.run_command(cmd=cmd, logfile=self.logfile, dolog=False) if retcode != 0: raise RuntimeError("Error running command: {0}".format(" ".join(cmd))) if cleanup: os.unlink(self.logfile)
def align_mustang(models, mustang_exe=None, work_dir=None): if not ample_util.is_exe(mustang_exe): msg = "Cannot find mustang executable: {0}".format(mustang_exe) raise RuntimeError(msg) owd = os.getcwd() if not work_dir: work_dir = owd work_dir = os.path.abspath(work_dir) if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) logfile = os.path.join(work_dir, 'mustang.log') basename = 'mustang' cmd = [mustang_exe, '-F', 'fasta', '-o', basename, '-i' ] + models rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir) if not rtn == 0: msg = "Error running mustang. Check logfile: {0}".format(logfile) raise RuntimeError(msg) alignment_file = os.path.join(work_dir, basename + ".afasta") if not os.path.isfile(alignment_file): msg = "Could not find alignment file: {0} after running mustang!".format(alignment_file) raise RuntimeError(msg) os.chdir(owd) # always need to go back to original directory return alignment_file
def generateMap( mtz, pdb, FP='FP', SIGFP='SIGFP', FREE='FREE', directory=None ): """Generate a map from an mtz file and a pdb using reforigin""" assert os.path.isfile( mtz ) and os.path.isfile( pdb ), "Cannot find files: {0} {1}".format( mtz, pdb ) if not directory: directory = os.getcwd() mapFile = ample_util.filename_append( filename=mtz, astr="map", directory=directory ) mapFile = os.path.abspath(mapFile) mapPdb = ample_util.filename_append( filename=pdb, astr="map", directory=directory ) cmd = [ "refmac5", "HKLIN", mtz, "HKLOUT", mapFile, "XYZIN", pdb, "XYZOUT", mapPdb ] # FIX FOR DIFFERENT FP etc. stdin ="""RIDG DIST SIGM 0.02 LABIN FP={0} SIGFP={1} FREE={2} MAKE HYDR N WEIGHT MATRIX 0.01 NCYC 0 END """.format( FP, SIGFP, FREE ) logfile=os.path.join(directory,"generateMap.log") ret = ample_util.run_command(cmd=cmd, logfile=logfile, dolog=True, stdin=stdin) assert ret == 0, "generateMap refmac failed-check log: {0}".format(logfile) return mapFile
def runNcont( self, pdbin=None, sourceChains=None, targetChains=None, maxDist=1.5, allAtom=False ): """FOO """ if allAtom: self.ncontLog = pdbin + ".ncont_aa.log" else: self.ncontLog = pdbin + ".ncont_rio.log" cmd = [ "ncont", "xyzin", pdbin ] # Build up stdin stdin = "" # Need to use list of chains from Native as can't work out negate operator for ncont if allAtom: stdin += "source {0}//*\n".format( ",".join( sourceChains ) ) stdin += "target {0}//*\n".format( ",".join( targetChains ) ) else: stdin += "source {0}//CA\n".format( ",".join( sourceChains ) ) stdin += "target {0}//CA\n".format( ",".join( targetChains ) ) stdin += "maxdist {0}\n".format( maxDist ) stdin += "cells 2\n" stdin += "sort target inc\n" retcode = ample_util.run_command(cmd=cmd, logfile=self.ncontLog, directory=os.getcwd(), dolog=True, stdin=stdin) if retcode != 0: raise RuntimeError("Error running ncont command: {0}\nCheck log: {1}".format(cmd,self.ncontLog))
def ccmtzOrigin( nativeMap, mrPdb ): """Use the phenix get_cc_mtz_pdb script to determine the origin of a MR pdb using the supplied map""" # resolve can only handle file names < 75 characters so we need to truncate # We copy the file rather than symlink so that this works on windows and then delete afterwards tempnam=None if len( os.path.basename(mrPdb) ) >= 75: tempnam = os.tempnam() # Need to add .pdb extension or it doesn't work tempnam += ".pdb" assert len(tempnam) < 75 shutil.copy( mrPdb, tempnam ) mrPdb = tempnam # make sure we can find the program get_cc_mtz_pdb=ample_util.find_exe('phenix.get_cc_mtz_pdb') cmd = [get_cc_mtz_pdb , nativeMap, mrPdb ] ret = ample_util.run_command(cmd=cmd, logfile="get_cc_mtz_pdb.log", dolog=False ) assert ret == 0, "phenix.get_cc_mtz_pdb refmac failed!" ofile = "temp_dir/resolve.offset" with open( ofile ) as o: line = o.readline().strip() t = line.split() assert t[0] == "OFFSET" origin = [ float( t[1] ) * -1, float( t[2] ) * -1, float( t[3] ) * -1 ] # remove temp file if we created it if tempnam: os.unlink(tempnam) return origin
def analyse(self, mr_pdb, cleanup=True): """Use SHELXE to analyse an MR pdb file to determine the origin shift and phase error This function sets the ``MPE``, ``wMPE`` and ``originShift`` attributes. Parameters ---------- mr_pdb : str Path to the Molecular Replacement PDB file """ os.chdir(self.work_dir) input_pdb = self.stem + ".pda" shutil.copyfile(mr_pdb, os.path.join(self.work_dir, input_pdb)) cmd = [self.shelxe_exe, input_pdb, '-a0', '-q', '-s0.5', '-o', '-n', '-t0', '-m0', '-x'] logfile = os.path.abspath('shelxe_{}.log'.format( str(uuid.uuid1()))) ret = ample_util.run_command(cmd=cmd, logfile=logfile, directory=None, dolog=False, stdin=None) if ret != 0: raise RuntimeError("Error running shelxe - see log: {0}".format(logfile)) sp = parse_shelxe.ShelxeLogParser(logfile) # Only added in later version of MRBUMP shelxe parser if hasattr(sp, 'MPE'): self.MPE = sp.MPE self.wMPE = sp.wMPE if isinstance(sp.originShift, list): self.originShift = [ o*-1 for o in sp.originShift ] if cleanup: for ext in ['.hkl', '.ent', '.pda','.pdo','.phs','.lst','_trace.ps']: try: os.unlink(self.stem + ext) except: pass os.unlink(logfile)
def add_sidechains(self, pdbin=None, pdbout=None, sequence=None, hydrogens=False, strip_oxt=False): """Add the specified sidechains to the pdb""" _pdbout = pdbout if strip_oxt: _pdbout = pdbout+"_OXT" cmd = [ self.scwrl_exe, "-i", pdbin, "-o", _pdbout ] # Not needed by default if sequence is not None: sequenceFile = os.path.join( self.workdir, "sequence.file") with open( sequenceFile, 'w' ) as w: w.write( sequence + os.linesep ) cmd += [ "-s", sequenceFile ] # Don't output hydrogens if not hydrogens: cmd += ['-h'] logfile = os.path.abspath("scwrl.log") retcode = ample_util.run_command(cmd, logfile=logfile) if retcode != 0: raise RuntimeError("Error running Scwrl - please check the logfile: {0}".format(logfile)) else: os.unlink(logfile) if strip_oxt: # Remove all OXT atoms pdb_edit.strip(_pdbout, pdbout, atom_types=['OXT']) os.unlink(_pdbout) return os.path.abspath(pdbout)
def _sfcif2mtz(self, cifPath, mtzPath ): """Convert a CIF containing structure factors to an MTZ file.""" cmd = [ "cif2mtz", "hklin", cifPath, "hklout", mtzPath ] logfile = os.path.join( os.getcwd(), "cif2mtz.log" ) # Need empty stdin to trigger eof to get program to run retcode = ample_util.run_command(cmd, stdin="", logfile=logfile) if retcode != 0: raise RuntimeError("Error running sfcif2mtz. Check the logfile: {0}".format(logfile))
def calc_rmsd(self, model1, model2, nresidues=None, logfile='lsqkab.out', purge=False): if not nresidues: _, nresidues = pdb_edit.num_atoms_and_residues(model1, first=True) stdin = """FIT RESIDUE CA 1 TO {0} CHAIN {1} MATCH 1 to {0} CHAIN {1} output RMS end""".format(nresidues, 'A') cmd = ['lsqkab', 'XYZINM', model1, 'XYZINF', model2] ample_util.run_command(cmd, logfile=logfile, stdin=stdin) rmsd = self.parse_lsqkab_output(logfile) # cleanup if purge: os.unlink(logfile) os.unlink('RMSTAB') return rmsd
def generate_distance_matrix(self,pdb_list): # Create list of pdb files fname = os.path.join(os.getcwd(), "files.list" ) with open( fname, 'w' ) as f: f.write( "\n".join( pdb_list )+"\n" ) # Index is just the order of the pdb in the file self.index2pdb = sorted(pdb_list) # Run fast_protein_cluster - this is just to generate the distance matrix, but there # doesn't seem to be a way to stop it clustering as well - not a problem as it just # generates more files log_name = os.path.abspath("fast_protein_cluster.log") matrix_file = "fpc.matrix" cmd = [self.executable, "--cluster_write_text_matrix", matrix_file, "-i", fname] retcode = ample_util.run_command( cmd, logfile=log_name ) if retcode != 0: raise RuntimeError("non-zero return code for fast_protein_cluster in generate_distance_matrix!\nCheck logfile:{0}".format(log_name)) mlen=0 data=[] with open(matrix_file) as f: for l in f: l = l.strip().split() x = int(l[0]) y = int(l[1]) d = float(l[2]) mlen = max(mlen,x+1) # +1 as we want the length data.append((x,y,d)) # create empty matrix - we use None's but this means we need to check for then when # looking through the matrix # use square matrix to make indexing easier as we're unlikely to be very big m = numpy.zeros([mlen, mlen]) # Fill in all values (upper triangle) for i,j,d in data: if i > j: m[j][i] = d else: m[i][j] = d # Copy to lower for x in range(mlen): for y in range(mlen): if x==y: continue m[y][x] = m[x][y] self.distance_matrix = m return
def test_gesamt_matrix_generic(self): # Test we can reproduce the original thresholds gesamt_exe = ample_util.find_exe("gesamt" + ample_util.EXE_EXT) clusterer = subcluster.GesamtClusterer(executable=gesamt_exe) pdb_list = sorted(glob.glob(os.path.join(self.testfiles_dir, "models",'*.pdb'))) clusterer._generate_distance_matrix_generic(pdb_list, purge_all=True) # Test two files manually index1 = 2 index2 = 25 f1 = pdb_list[index1] f2 = pdb_list[index2] # Run gesamt to get the score between the two logfile = 'gesamt.log' ample_util.run_command([gesamt_exe, f1, f2], logfile=logfile) qscore = None with open(logfile) as f: for l in f.readlines(): if l.startswith(' Q-score'): qscore = float(l.split()[2]) self.assertIsNotNone(qscore, "No q-score found") # read score matrix matrix = [] with open(subcluster.SCORE_MATRIX_NAME) as f: for l in f.readlines(): if not l.strip(): continue fields = l.split() matrix.append((int(fields[0]),int(fields[1]), float(fields[2]))) # Make sure the score matches for l in matrix: if l[0] == index1 and l[1] == index2: # Gesamt log and out file formats have different precisions self.assertAlmostEqual(l[2], qscore, 3, "Q-scores differ: {0} - {1}".format(l[2], qscore)) os.unlink(logfile) os.unlink(subcluster.SCORE_MATRIX_NAME) os.unlink(subcluster.FILE_LIST_NAME) return
def _cluster(self, models, run_dir=None, score_type='rmsd', score_matrix=None, nproc=1): """ Run spicker to cluster the models """ owd = os.getcwd() if run_dir: self.run_dir = os.path.abspath(run_dir) if not self.run_dir: self.run_dir = os.path.join(owd, 'spicker') if not os.path.isdir(self.run_dir): os.mkdir(self.run_dir) os.chdir(self.run_dir) logger.debug("Running spicker with score_type {0} in directory: {1}".format(score_type, self.run_dir)) logger.debug("Using executable: {0} on {1} processors".format(self.spicker_exe, nproc)) self.score_type = score_type self.create_input_files(models, score_type=score_type, score_matrix=score_matrix) # We need special care if we are running with tm scores as we will be using the OPENMP # version of spicker which requires increasing the stack size on linux and setting the # OMP_NUM_THREADS environment variable on all platforms # The stack size on 64-bit linux seems to be 15Mb, so I guess asking for 50 seems reasonable # I'm assuming that the limit is in bytes and specified by an integer so 50Mb -> 50000000 preexec_fn = None env = {'OMP_NUM_THREADS': str(nproc)} if sys.platform.lower().startswith('linux'): def set_stack(): import resource stack_bytes = 50000000 # 50Mb resource.setrlimit(resource.RLIMIT_STACK, (stack_bytes, stack_bytes)) preexec_fn = set_stack logfile = os.path.abspath("spicker.log") rtn = ample_util.run_command([self.spicker_exe], logfile=logfile, env=env, preexec_fn=preexec_fn) if not rtn == 0: raise RuntimeError("Error running spicker, check logfile: {0}".format(logfile)) # Read the log and generate the results self.results = self.process_log() # Always go back to where we started os.chdir(owd) return
def preparePlacedPdb( self, placedPdb=None, placedChainID=None, nativeChainID=None, resSeqMap=None ): """ Use pdbcur to: - extract chain to compare - strip down to CA/BB - remove any atoms that cannot be compared to the native """ # Build up stdin # Extract the chain to compare stdin = "lvchain {0}\n".format( placedChainID ) # Rename it to match the native if placedChainID != nativeChainID: stdin += "renchain {0} {1}\n".format( placedChainID, nativeChainID ) # Find out if there are atoms in the model that we need to remove incomparable = resSeqMap.targetIncomparable( bbMask=not self.cAlphaOnly ) if len( incomparable ): # Build up stdin - I'm too thick to work out the selection syntax for a discrete list for e in incomparable: stdin += "delresidue {0}\n".format( e ) if self.cAlphaOnly: # Strip down to CA stdin += 'lvatom "CA[C]:*"\n' else: # Strip down to backbone atoms stdin += 'lvatom "N,CA,C,O,CB[N,C,O]"\n' # Renumber? stdin += "sernum\n" # Name the output file accordingly astr = "chain{0}".format( placedChainID ) placedChainPdb = ample_util.filename_append( filename=placedPdb, astr=astr, directory=self.workdir ) # Now run pdbcur to do it all cmd="pdbcur xyzin {0} xyzout {1}".format( placedPdb, placedChainPdb ).split() logfile = "{0}.log".format( placedChainPdb ) retcode = ample_util.run_command( cmd=cmd, logfile=logfile, directory=self.workdir, dolog=False, stdin=stdin) if retcode != 0: raise RuntimeError("Error extracting chain from placed PDB {0} in directory {1}".format( placedPdb, self.workdir )) else: os.unlink(logfile) return placedChainPdb
def merge(pdb1=None, pdb2=None, pdbout=None): """Merge two pdb files into one""" logfile = pdbout + ".log" cmd = ['pdb_merge', 'xyzin1', pdb1, 'xyzin2', pdb2, 'xyzout', pdbout] stdin = 'nomerge' retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False, stdin=stdin) if retcode == 0: os.unlink(logfile) else: raise RuntimeError("Error merging pdbs: {0} {1}".format(pdb1, pdb2))
def calpha_only(inpdb, outpdb): """Strip PDB to c-alphas only""" logfile = outpdb + ".log" cmd = "pdbcur xyzin {0} xyzout {1}".format(inpdb, outpdb).split() stdin = 'lvatom "CA[C]:*"' retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False, stdin=stdin) if retcode == 0: os.unlink(logfile) else: raise RuntimeError("Error stripping PDB to c-alpha atoms")
def analyse(self, mr_pdb): """Use SHELXE to analyse an MR pdb file to determine the origin shift and phase error This function sets the ``MPE``, ``wMPE`` and ``originShift`` attributes. Parameters ---------- mr_pdb : str Path to the Molecular Replacement PDB file """ os.chdir(self.work_dir) input_pdb = self.stem + ".pda" shutil.copyfile(mr_pdb, os.path.join(self.work_dir, input_pdb)) cmd = [ self.shelxe_exe, input_pdb, '-a0', '-q', '-s0.5', '-o', '-n', '-t0', '-m0', '-x' ] logfile = os.path.abspath('shelxe_{}.log'.format(str(uuid.uuid1()))) ret = ample_util.run_command(cmd=cmd, logfile=logfile, directory=None, dolog=False, stdin=None) if ret != 0: raise RuntimeError( "Error running shelxe - see log: {0}".format(logfile)) sp = parse_shelxe.ShelxeLogParser(logfile) # Only added in later version of MRBUMP shelxe parser if hasattr(sp, 'MPE'): self.MPE = sp.MPE self.wMPE = sp.wMPE self.originShift = [o * -1 for o in sp.originShift] for ext in [ '.hkl', '.ent', '.pda', '.pdo', '.phs', '.lst', '_trace.ps' ]: try: os.unlink(self.stem + ext) except: pass os.unlink(logfile)
def compareSingle(self, nativePdb=None, modelPdb=None, sequenceIndependant=True, rmsd=False, workdir=None): self.workdir = workdir if not self.workdir: self.workdir = os.getcwd() cmd = [self.maxclusterExe, "-e", nativePdb, "-p", modelPdb] if sequenceIndependant: cmd.append("-in") if rmsd: cmd.append("-rmsd") logfile = ample_util.filename_append(filename=modelPdb, astr="maxcluster", directory=self.workdir) if rmsd: logfile = os.path.splitext(logfile)[0] + "_rmsd.log" else: logfile = os.path.splitext(logfile)[0] + ".log" self.maxclusterLogfile = logfile #print "running cmd "," ".join( cmd ) retcode = ample_util.run_command(cmd, logfile=self.maxclusterLogfile, dolog=False) if retcode != 0: msg = "non-zero return code for maxcluster in runMaxcluster!" #logging.critical( msg ) print msg if rmsd: data = self.parseLogSingleRmsd() else: data = self.parseLogSingleTm() return data
def _generate_pairwise_rmsd_matrix(self, models, purge=False): """ Use gesamt to generate an all-by-all pairwise rmsd matrix of a list of pdb models Notes: gesamt -input-list inp_list.dat -sheaf-x where inp_list.dat contains: 1ADZ.pdb -s /1/A 1ADZ.pdb -s /2/A 1ADZ.pdb -s /3/A """ # Index is just the order of the pdbs models = sorted(models) self.index2pdb = models # Create file with list of pdbs and model/chain glist = 'gesamt_models.dat' with open(glist, 'w') as w: for m in models: w.write("{0} -s /1/A \n".format(m)) w.write('\n') cmd = [self.executable, '-input-list', glist, '-sheaf-x', '-nthreads={0}'.format(self.nproc)] logfile = os.path.abspath('gesamt_archive.log') rtn = ample_util.run_command(cmd, logfile) if rtn != 0: raise RuntimeError("Error running gesamt - check logfile: {0}".format(logfile)) # Create a square distance_matrix no_models in size filled with None num_models = len(models) self.distance_matrix = numpy.zeros([num_models, num_models]) # Read in the rmsds calculated self._parse_gesamt_rmsd_log(logfile, num_models) if purge: os.unlink(glist) os.unlink(logfile) return
def align_gesamt(models, gesamt_exe=None, work_dir=None): if not ample_util.is_exe(gesamt_exe): msg = "Cannot find gesamt executable: {0}".format(gesamt_exe) raise RuntimeError(msg) owd = os.getcwd() if not work_dir: work_dir = owd work_dir = os.path.abspath(work_dir) if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) # Need to map chain name to pdb model2chain = {} for m in models: seqd = pdb_edit.sequence(m) if len(seqd) != 1: msg = "Model {0} does not contain a single chain, got: {1}".format(*seqd.keys()) raise RuntimeError(msg) model2chain[m] = seqd.keys()[0] basename = 'gesamt' logfile = os.path.join(work_dir, 'gesamt.log') alignment_file = os.path.join(work_dir, basename + ".afasta") # Build up command-line cmd = [gesamt_exe] # We iterate through the models to make sure the order stays the same for m in models: cmd += [ m, '-s', model2chain[m] ] cmd += ['-o', '{0}.pdb'.format(basename), '-a', alignment_file] rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir) if not rtn == 0: msg = "Error running gesamt. Check logfile: {0}".format(logfile) raise RuntimeError(msg) if not os.path.isfile(alignment_file): msg = "Gesamt did not generate an alignment file.\nPlease check the logfile: {0}".format(logfile) raise RuntimeError(msg) if sys.platform.startswith("win"): alignment_file = _gesamt_aln_windows_fix(alignment_file) os.chdir(owd) # always need to go back to original directory return alignment_file
def run_scripts_serial(job_scripts, nproc=None, monitor=None, early_terminate=None, check_success=None): success = False if len(job_scripts) > 1: # Don't need early terminate - check_success if it exists states what's happening js = JobServer() js.setJobs(job_scripts) success = js.start( nproc=nproc, early_terminate=bool(early_terminate), check_success=check_success, monitor=monitor ) else: script = job_scripts[0] name = os.path.splitext(os.path.basename(script))[0] logfile = "{0}.log".format(name) wdir = os.path.dirname(script) os.chdir(wdir) rtn = ample_util.run_command([script], logfile=logfile) if rtn == 0: success = True return success
def run_compare_model_list(self, nativePdb=None, models=None, logfile=None): # Generate the list of models pdblist = os.path.join(self.workdir, "models.list") with open(pdblist, 'w') as f: f.write(os.linesep.join(models)) # Run Maxcluster cmd = [self.maxclusterExe, "-e", nativePdb, "-l", pdblist] retcode = ample_util.run_command(cmd, logfile=logfile, dolog=True) if retcode != 0: msg = "non-zero return code for maxcluster in runMaxcluster!" raise RuntimeError(msg) return
def del_column(file_name, column, overwrite=True): """Delete a column from an mtz file and return a path to the file""" mtzDel = ample_util.filename_append(file_name, "d{0}".format(column)) cmd = ["mtzutils", "hklin1", file_name, "hklout", mtzDel] stdin = "EXCLUDE 1 {0}".format(column) logfile = os.path.join(os.getcwd(), "mtzutils_{}.log".format(str(uuid.uuid1()))) retcode = ample_util.run_command(cmd, stdin=stdin, logfile=logfile) if retcode != 0: raise RuntimeError( "Error running mtzutils. Check the logfile: {0}".format(logfile)) else: os.unlink(logfile) if overwrite: shutil.move(mtzDel, file_name) return file_name else: return mtzDel
def run_cphasematch(merged_mtz, f_sigf_labels, native_phase_labels, mr_phase_labels, resolution_bins=12, cleanup=True): """run cphasematch to get phase error""" argd = { 'merged_mtz': merged_mtz, 'f_label': f_sigf_labels[0], 'sigf_label': f_sigf_labels[1], 'native_phase_label1': native_phase_labels[0], 'native_phase_label2': native_phase_labels[1], 'mr_phase_label1': mr_phase_labels[0], 'mr_phase_label2': mr_phase_labels[1], 'resolution_bins': resolution_bins } stdin = """ mtzin {merged_mtz} colin-fo /*/*/[{f_label},{sigf_label}] colin-fc-1 /*/*/[{native_phase_label1},{native_phase_label2}] colin-fc-2 /*/*/[{mr_phase_label1},{mr_phase_label2}] resolution-bins {resolution_bins} """.format(**argd) logfile = os.path.abspath("cphasematch.log") cmd = ['cphasematch', "-stdin"] retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile) if retcode != 0: raise RuntimeError( "Error running command: {0}\nCheck logfile: {1}".format( " ".join(cmd), logfile)) before_origin, after_origin, change_of_hand, origin_shift = parse_cphasematch_log( logfile) if cleanup: os.unlink(logfile) return before_origin, after_origin, change_of_hand, origin_shift
def extract_model(inpdb, outpdb, modelID): """Extract modelID from inpdb into outpdb""" assert modelID > 0 logfile = outpdb + ".log" cmd = "pdbcur xyzin {0} xyzout {1}".format(inpdb, outpdb).split() stdin = "lvmodel /{0}\n".format(modelID) retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False, stdin=stdin) if retcode != 0: raise RuntimeError("Problem extracting model with cmd: {0}".format) os.unlink(logfile)
def add_sidechains(self, pdbin=None, pdbout=None, sequence=None, hydrogens=False, strip_oxt=False): """Add the specified sidechains to the pdb""" _pdbout = pdbout if strip_oxt: _pdbout = pdbout + "_OXT" cmd = [self.scwrl_exe, "-i", pdbin, "-o", _pdbout] # Not needed by default if sequence is not None: sequenceFile = os.path.join(self.workdir, "sequence.file") with open(sequenceFile, 'w') as w: w.write(sequence + os.linesep) cmd += ["-s", sequenceFile] # Don't output hydrogens if not hydrogens: cmd += ['-h'] logfile = os.path.abspath("scwrl.log") retcode = ample_util.run_command(cmd, logfile=logfile) if retcode != 0: raise RuntimeError( "Error running Scwrl - please check the logfile: {0}".format( logfile)) else: os.unlink(logfile) if strip_oxt: # Remove all OXT atoms pdb_edit.strip(_pdbout, pdbout, atom_types=['OXT']) os.unlink(_pdbout) return os.path.abspath(pdbout)
def run(self, refPdb=None, inPdb=None, outPdb=None, connectivityRadius=None, originHand=True, cleanup=False): """FOO """ self._reset() self.logfile = outPdb + "_{}.log".format(str(uuid.uuid1())) cmd = ['csymmatch', "-pdbin-ref", refPdb, "-pdbin", inPdb, "-pdbout", outPdb] if originHand: cmd += ["-origin-hand"] if connectivityRadius: cmd += ["-connectivity-radius", connectivityRadius] retcode = ample_util.run_command(cmd=cmd, logfile=self.logfile, dolog=False) if retcode != 0: raise RuntimeError("Error running command: {0}".format(" ".join(cmd))) if cleanup: os.unlink(self.logfile)
def runCompareDirectory(self, nativePdb=None, modelsDirectory=None, logfile=None): # Generate the list of models pdblist = os.path.join(self.workdir, "models.list") with open(pdblist, 'w') as f: l = glob.glob(os.path.join(modelsDirectory, '*.pdb')) if not len(l) > 0: raise RuntimeError( "Could not find any pdb files in directory: {0}".format( modelsDirectory)) f.write(os.linesep.join(l)) cmd = [self.maxclusterExe, "-e", nativePdb, "-l", pdblist] retcode = ample_util.run_command(cmd, logfile=logfile, dolog=True) if retcode != 0: raise RuntimeError( "non-zero return code for maxcluster in runMaxcluster!")
def runNcont(self, pdbin=None, sourceChains=None, targetChains=None, maxDist=1.5, allAtom=False): """FOO """ if allAtom: self.ncontLog = pdbin + ".ncont_aa.log" else: self.ncontLog = pdbin + ".ncont_rio.log" cmd = ["ncont", "xyzin", pdbin] # Build up stdin stdin = "" # Need to use list of chains from Native as can't work out negate operator for ncont if allAtom: stdin += "source {0}//*\n".format(",".join(sourceChains)) stdin += "target {0}//*\n".format(",".join(targetChains)) else: stdin += "source {0}//CA\n".format(",".join(sourceChains)) stdin += "target {0}//CA\n".format(",".join(targetChains)) stdin += "maxdist {0}\n".format(maxDist) stdin += "cells 2\n" stdin += "sort target inc\n" retcode = ample_util.run_command(cmd=cmd, logfile=self.ncontLog, directory=os.getcwd(), dolog=True, stdin=stdin) if retcode != 0: raise RuntimeError, "Error running ncont command: {0}\nCheck log: {1}".format( cmd, self.ncontLog) return
def add_rfree(file_name, directory=None, overwrite=True): """Run uniqueify on mtz file to generate RFREE data column""" mtzUnique = ample_util.filename_append(file_name, "uniqueify", directory=directory) cmd = ['uniqueify', file_name, mtzUnique] logfile = os.path.join(os.getcwd(), "uniqueify_{}.log".format(str(uuid.uuid1()))) retcode = ample_util.run_command(cmd, logfile=logfile) if retcode != 0: raise RuntimeError( "Error running command: {0}. Check the logfile: {1}".format( " ".join(cmd), logfile)) else: os.unlink(logfile) if overwrite: shutil.move(mtzUnique, file_name) return file_name else: return mtzUnique
def run_cphasematch(merged_mtz, f_sigf_labels, native_phase_labels, mr_phase_labels, resolution_bins=12, cleanup=True): """run cphasematch to get phase error""" argd = { 'merged_mtz' : merged_mtz, 'f_label' : f_sigf_labels[0], 'sigf_label' : f_sigf_labels[1], 'native_phase_label1' : native_phase_labels[0], 'native_phase_label2' : native_phase_labels[1], 'mr_phase_label1' : mr_phase_labels[0], 'mr_phase_label2' : mr_phase_labels[1], 'resolution_bins' : resolution_bins } stdin = """ mtzin {merged_mtz} colin-fo /*/*/[{f_label},{sigf_label}] colin-fc-1 /*/*/[{native_phase_label1},{native_phase_label2}] colin-fc-2 /*/*/[{mr_phase_label1},{mr_phase_label2}] resolution-bins {resolution_bins} """.format(**argd) logfile = os.path.abspath("cphasematch.log") cmd= [ 'cphasematch', "-stdin" ] retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile) if retcode != 0: raise RuntimeError("Error running command: {0}\nCheck logfile: {1}".format(" ".join(cmd), logfile)) before_origin, after_origin, change_of_hand, origin_shift = parse_cphasematch_log(logfile) if cleanup:os.unlink(logfile) return before_origin, after_origin, change_of_hand, origin_shift
def generateMap(mtz, pdb, FP='FP', SIGFP='SIGFP', FREE='FREE', directory=None): """Generate a map from an mtz file and a pdb using reforigin""" assert os.path.isfile(mtz) and os.path.isfile( pdb), "Cannot find files: {0} {1}".format(mtz, pdb) if not directory: directory = os.getcwd() mapFile = ample_util.filename_append(filename=mtz, astr="map", directory=directory) mapFile = os.path.abspath(mapFile) mapPdb = ample_util.filename_append(filename=pdb, astr="map", directory=directory) cmd = [ "refmac5", "HKLIN", mtz, "HKLOUT", mapFile, "XYZIN", pdb, "XYZOUT", mapPdb ] # FIX FOR DIFFERENT FP etc. stdin = """RIDG DIST SIGM 0.02 LABIN FP={0} SIGFP={1} FREE={2} MAKE HYDR N WEIGHT MATRIX 0.01 NCYC 0 END """.format(FP, SIGFP, FREE) logfile = os.path.join(directory, "generateMap.log") ret = ample_util.run_command(cmd=cmd, logfile=logfile, dolog=True, stdin=stdin) assert ret == 0, "generateMap refmac failed-check log: {0}".format(logfile) return mapFile
def translate(inpdb=None, outpdb=None, ftranslate=None): """translate pdb args: ftranslate -- vector of fractional coordinates to shift by """ logfile = outpdb + ".log" cmd = "pdbcur xyzin {0} xyzout {1}".format(inpdb, outpdb).split() # Build up stdin stdin = 'translate * frac {0:F} {1:F} {2:F}'.format( ftranslate[0], ftranslate[1], ftranslate[2]) retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False, stdin=stdin) if retcode == 0: # remove temporary files os.unlink(logfile) else: raise RuntimeError("Error translating PDB")
def to_hkl(mtz_file, hkl_file=None, directory=None, F=None, SIGF=None, FREE=None): if directory is None: directory = os.getcwd() if hkl_file is None: name = os.path.splitext(os.path.basename(mtz_file))[0] hkl_file = os.path.join(directory, name + ".hkl") if F is None or SIGF is None or FREE is None: F, SIGF, FREE = get_labels(mtz_file) cmd = ['mtz2various', 'HKLIN', mtz_file, 'HKLOUT', hkl_file] logfile = "mtz2various_{}.log".format(str(uuid.uuid1())) stdin = """LABIN FP={0} SIGFP={1} FREE={2} OUTPUT SHELX FSQUARED END""".format(F, SIGF, FREE) ret = ample_util.run_command(cmd=cmd, logfile=logfile, directory=None, dolog=False, stdin=stdin) if ret != 0: raise RuntimeError( "Error converting {0} to HKL format - see log: {1}".format( mtz_file, logfile)) os.unlink(logfile) return hkl_file
def backbone(inpath=None, outpath=None): """Only output backbone atoms. """ # pdbcur segfaults with long pathnames inpath = os.path.relpath(inpath) outpath = os.path.relpath(outpath) logfile = outpath + ".log" cmd = "pdbcur xyzin {0} xyzout {1}".format(inpath, outpath).split() stdin = 'lvatom "N,CA,C,O,CB[N,C,O]"' retcode = ample_util.run_command(cmd=cmd, logfile=logfile, directory=os.getcwd(), dolog=False, stdin=stdin) if retcode == 0: os.unlink(logfile) else: raise RuntimeError( "Error stripping PDB to backbone atoms. See log:{0}".format( logfile))
def generate_distance_matrix(self, pdb_list): """Run maxcluster to generate the distance distance_matrix""" num_models = len(pdb_list) if not num_models: msg = "generate_distance_matrix got empty pdb_list!" logging.critical(msg) raise RuntimeError(msg) self.index2pdb = [0] * num_models # Maxcluster arguments # -l [file] File containing a list of PDB model fragments # -L [n] Log level (default is 4 for single MaxSub, 1 for lists) # -d [f] The distance cut-off for search (default auto-calibrate) # -bb Perform RMSD fit using backbone atoms # -C [n] Cluster method: 0 - No clustering # -rmsd ??? #os.system(MAX + ' -l list -L 4 -rmsd -d 1000 -bb -C0 >MAX_LOG ') #print 'MAX Done' # Create the list of files for maxcluster fname = os.path.join(os.getcwd(), FILE_LIST_NAME) with open(fname, 'w') as f: f.write("\n".join(pdb_list) + "\n") #log_name = "maxcluster_radius_{0}.log".format(radius) log_name = os.path.abspath("maxcluster.log") cmd = [ self.executable, "-l", fname, "-L", "4", "-rmsd", "-d", "1000", "-bb", "-C0" ] retcode = ample_util.run_command(cmd, logfile=log_name) if retcode != 0: msg = "non-zero return code for maxcluster in generate_distance_matrix!\nSee logfile: {0}".format( log_name) logging.critical(msg) raise RuntimeError(msg) # Create a square distance_matrix no_models in size filled with None parity = 0.0 self.distance_matrix = numpy.full([num_models, num_models], parity) #jmht Save output for parsing - might make more sense to use one of the dedicated maxcluster output formats #max_log = open(cur_dir+'/MAX_LOG') max_log = open(log_name, 'r') pattern = re.compile('INFO \: Model') for line in max_log: if re.match(pattern, line): # Split so that we get a list with # 0: model 1 index # 1: path to model 1 without .pdb suffix # 2: model 2 index # 3: path to model 2 without .pdb suffix # 4: distance metric split = re.split( 'INFO \: Model\s*(\d*)\s*(.*)\.pdb\s*vs\. Model\s*(\d*)\s*(.*)\.pdb\s*=\s*(\d*\.\d*)', line) self.distance_matrix[int(split[1]) - 1][int(split[3]) - 1] = float(split[5]) if split[2] + '.pdb' not in self.index2pdb: self.index2pdb[int(split[1]) - 1] = split[2] + '.pdb' if split[4] + '.pdb' not in self.index2pdb: self.index2pdb[int(split[3]) - 1] = split[4] + '.pdb' # Copy in other half of matrix - we use a full matrix as it's easier to scan for clusters for x in range(len(self.distance_matrix)): for y in range(len(self.distance_matrix)): self.distance_matrix[y][x] = self.distance_matrix[x][y] return
def cluster(self, models=None, num_clusters=None, nproc=1, score_type="rmsd", cluster_method="kmeans", work_dir=None, fpc_exe=None, max_cluster_size=200, benchmark=False ): # FPC default if 5 clusters - we just run with this for the time being FPC_NUM_CLUSTERS=5 if num_clusters is None or num_clusters > FPC_NUM_CLUSTERS: msg = "Cannot work with more than {0} clusters, got: {1}.".format(FPC_NUM_CLUSTERS,num_clusters) raise RuntimeError(msg) owd=os.getcwd() if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) if not len(models) or not all([os.path.isfile(m) for m in models]): msg = "Missing models: {0}".format(models) raise RuntimeError(msg) # Create list of files flist='files.list' with open(flist,'w') as f: for m in models: f.write("{0}\n".format(os.path.abspath(m))) if not os.path.isfile(fpc_exe): msg = "Cannot find fast_protein_cluster executable: {0}".format(fpc_exe) raise RuntimeError(msg) # Build up the command-line cmd=[fpc_exe] if score_type=="rmsd": cmd += ['--rmsd'] elif score_type=="tm": cmd += ['--tmscore'] else: msg = "Unrecognised score_type: {0}".format(score_type) raise RuntimeError(msg) if cluster_method=="kmeans": cmd += ['--cluster_kmeans'] elif cluster_method=="hcomplete": cmd += ['--cluster_hcomplete'] else: msg = "Unrecognised cluster_method: {0}".format(cluster_method) raise RuntimeError(msg) if nproc > 1: cmd += ['--nthreads',str(nproc)] # Always save the distance matrix cmd += ['--write_text_matrix','matrix.txt'] # For benchmark we use a constant seed to make sure we get the same results if benchmark: cmd += ['-S','1'] # Finally the list of files cmd += ['-i',flist] logfile=os.path.abspath("fast_protein_cluster.log") retcode = ample_util.run_command(cmd,logfile=logfile) if retcode != 0: msg = "non-zero return code for fast_protein_cluster in cluster!\nCheck logfile:{0}".format(logfile) raise RuntimeError(msg) cluster_list='cluster_output.clusters' cluster_stats='cluster_output.cluster.stats' if not os.path.isfile(cluster_list) or not os.path.isfile(cluster_stats): msg = "Cannot find files: {0} and {1}".format(cluster_list,cluster_stats) raise RuntimeError(msg) # Check stats and get centroids csizes=[] centroids=[] with open(cluster_stats) as f: for line in f: if line.startswith("Cluster:"): fields=line.split() csizes.append(int(fields[4])) centroids.append(fields[7]) if len(csizes) != FPC_NUM_CLUSTERS: msg = "Found {0} clusters in {1} but was expecting {2}".format(len(csizes),cluster_stats,FPC_NUM_CLUSTERS) raise RuntimeError(msg) all_clusters=[[] for i in range(FPC_NUM_CLUSTERS)] # Read in the clusters with open(cluster_list) as f: for line in f: fields=line.split() model=fields[0] idxCluster=int(fields[1]) all_clusters[idxCluster].append(model) # Check if False: # Ignore this test for now as there seems to be a bug in fast_protein_cluster with the printing of sizes maxc=None for i,cs in enumerate(csizes): if not cs == len(all_clusters[i]): msg = "Cluster {0} size {1} does not match stats size {2}".format(i,len(all_clusters[i]),cs) raise RuntimeError(msg) if i==0: maxc=cs else: if cs > maxc: msg = "Clusters do not appear to be in size order!" raise RuntimeError(msg) # make sure all clusters are < max_cluster_size for i, c in enumerate(all_clusters): if len(c) > max_cluster_size: all_clusters[i]=c[:max_cluster_size] # Create the data - we loop through the number of clusters specified by the user clusters=[] for i in range(num_clusters): cluster = Cluster() cluster.method = cluster_method cluster.score_type = score_type cluster.index = i + 1 cluster.centroid = centroids[i] cluster.num_clusters = num_clusters cluster.models = all_clusters[i] os.chdir(owd) return clusters
def on_cluster(): try: retcode = ample_util.run_command(["qstat"]) except: retcode = -1 return True if retcode == 0 else False
def _generate_distance_matrix_generic(self, models, purge=True, purge_all=False, metric='qscore'): # Make sure all the files are in the same directory otherwise we wont' work mdir = os.path.dirname(models[0]) if not all([ os.path.dirname(p) == mdir for p in models ]): raise RuntimeError("All pdb files are not in the same directory!") models = sorted(models) self.index2pdb = models nmodels = len(models) # Create list of pdb files fname = os.path.join(os.getcwd(), FILE_LIST_NAME) with open(fname, 'w') as f: f.write("\n".join(models) + "\n") # Make the archive logger.debug("Generating gesamt archive from models in directory %s", mdir) garchive = 'gesamt.archive' if not os.path.isdir(garchive): os.mkdir(garchive) logfile = os.path.abspath('gesamt_archive.log') cmd = [self.executable, '--make-archive', garchive, '-pdb', mdir] #cmd += [ '-nthreads=auto' ] cmd += ['-nthreads={0}'.format(self.nproc)] # HACK FOR DYLD!!!! env = None #env = {'DYLD_LIBRARY_PATH' : '/opt/ccp4-devtools/install/lib'} rtn = ample_util.run_command(cmd, logfile,env = env) if rtn != 0: raise RuntimeError("Error running gesamt - check logfile: {0}".format(logfile)) if purge_all: os.unlink(logfile) # Now loop through each file creating the matrix if metric == 'rmsd': parity = 0.0 elif metric == 'qscore': parity = 1 else: raise RuntimeError("Unrecognised metric: {0}".format(metric)) #m = [[parity for _ in range(nmodels)] for _ in range(nmodels)] m = numpy.full([nmodels, nmodels], parity, dtype=numpy.float) for i, model in enumerate(models): mname = os.path.basename(model) gesamt_out = '{0}_gesamt.out'.format(mname) logfile = '{0}_gesamt.log'.format(mname) cmd = [self.executable, model, '-archive', garchive, '-o', gesamt_out] cmd += ['-nthreads={0}'.format(self.nproc)] rtn = ample_util.run_command(cmd, logfile) if rtn != 0: raise RuntimeError("Error running gesamt!") else: if purge: os.unlink(logfile) gdata = self._parse_gesamt_out(gesamt_out) assert gdata[0].file_name == mname, gdata[0].file_name + " " + mname score_dict = {g.file_name: (g.rmsd, g.q_score) for g in gdata} for j in range(i + 1, nmodels): # Try and get the rmsd and qscore for this model. If it's missing we assume the model was # too divergent for gesamt to find it and we set the rmsd and qscore to fixed values model2 = os.path.basename(models[j]) try: rmsd, qscore = score_dict[model2] except KeyError: rmsd = RMSD_MAX qscore = QSCORE_MIN if metric == 'rmsd': score = rmsd elif metric == 'qscore': score = qscore else: raise RuntimeError("Unrecognised metric: {0}".format(metric)) m[i, j] = score if purge_all: os.unlink(gesamt_out) # Copy upper half of matrix to lower i_lower = numpy.tril_indices(nmodels, -1) m[i_lower] = m.T[i_lower] # make the matrix symmetric self.distance_matrix = m # Remove the gesamt archive if purge: shutil.rmtree(garchive) # Write out the matrix in a form spicker can use self.dump_pdb_matrix(SCORE_MATRIX_NAME) return
def cluster( self, models=None, num_clusters=None, nproc=1, score_type="rmsd", cluster_method="kmeans", work_dir=None, fpc_exe=None, max_cluster_size=200, benchmark=False, ): # FPC default if 5 clusters - we just run with this for the time being FPC_NUM_CLUSTERS = 5 if num_clusters is None or num_clusters > FPC_NUM_CLUSTERS: msg = "Cannot work with more than {0} clusters, got: {1}.".format( FPC_NUM_CLUSTERS, num_clusters) raise RuntimeError(msg) owd = os.getcwd() if not os.path.isdir(work_dir): os.mkdir(work_dir) os.chdir(work_dir) if not len(models) or not all([os.path.isfile(m) for m in models]): msg = "Missing models: {0}".format(models) raise RuntimeError(msg) # Create list of files flist = 'files.list' with open(flist, 'w') as f: for m in models: f.write("{0}\n".format(os.path.abspath(m))) if not os.path.isfile(fpc_exe): msg = "Cannot find fast_protein_cluster executable: {0}".format( fpc_exe) raise RuntimeError(msg) # Build up the command-line cmd = [fpc_exe] if score_type == "rmsd": cmd += ['--rmsd'] elif score_type == "tm": cmd += ['--tmscore'] else: msg = "Unrecognised score_type: {0}".format(score_type) raise RuntimeError(msg) if cluster_method == "kmeans": cmd += ['--cluster_kmeans'] elif cluster_method == "hcomplete": cmd += ['--cluster_hcomplete'] else: msg = "Unrecognised cluster_method: {0}".format(cluster_method) raise RuntimeError(msg) if nproc > 1: cmd += ['--nthreads', str(nproc)] # Always save the distance matrix cmd += ['--write_text_matrix', 'matrix.txt'] # For benchmark we use a constant seed to make sure we get the same results if benchmark: cmd += ['-S', '1'] # Finally the list of files cmd += ['-i', flist] logfile = os.path.abspath("fast_protein_cluster.log") retcode = ample_util.run_command(cmd, logfile=logfile) if retcode != 0: msg = "non-zero return code for fast_protein_cluster in cluster!\nCheck logfile:{0}".format( logfile) raise RuntimeError(msg) cluster_list = 'cluster_output.clusters' cluster_stats = 'cluster_output.cluster.stats' if not os.path.isfile(cluster_list) or not os.path.isfile( cluster_stats): msg = "Cannot find files: {0} and {1}".format( cluster_list, cluster_stats) raise RuntimeError(msg) # Check stats and get centroids csizes = [] centroids = [] with open(cluster_stats) as f: for line in f: if line.startswith("Cluster:"): fields = line.split() csizes.append(int(fields[4])) centroids.append(fields[7]) if len(csizes) != FPC_NUM_CLUSTERS: msg = "Found {0} clusters in {1} but was expecting {2}".format( len(csizes), cluster_stats, FPC_NUM_CLUSTERS) raise RuntimeError(msg) all_clusters = [[] for i in range(FPC_NUM_CLUSTERS)] # Read in the clusters with open(cluster_list) as f: for line in f: fields = line.split() model = fields[0] idxCluster = int(fields[1]) all_clusters[idxCluster].append(model) # Check if False: # Ignore this test for now as there seems to be a bug in fast_protein_cluster with the printing of sizes maxc = None for i, cs in enumerate(csizes): if not cs == len(all_clusters[i]): msg = "Cluster {0} size {1} does not match stats size {2}".format( i, len(all_clusters[i]), cs) raise RuntimeError(msg) if i == 0: maxc = cs else: if cs > maxc: msg = "Clusters do not appear to be in size order!" raise RuntimeError(msg) # make sure all clusters are < max_cluster_size for i, c in enumerate(all_clusters): if len(c) > max_cluster_size: all_clusters[i] = c[:max_cluster_size] # Create the data - we loop through the number of clusters specified by the user clusters = [] for i in range(num_clusters): cluster = Cluster() cluster.method = cluster_method cluster.score_type = score_type cluster.index = i + 1 cluster.centroid = centroids[i] cluster.num_clusters = num_clusters cluster.models = all_clusters[i] os.chdir(owd) return clusters
def _cluster(self, models, run_dir=None, score_type='rmsd', score_matrix=None, nproc=1): """ Run spicker to cluster the models """ owd = os.getcwd() if run_dir: self.run_dir = os.path.abspath(run_dir) if not self.run_dir: self.run_dir = os.path.join(owd, 'spicker') if not os.path.isdir(self.run_dir): os.mkdir(self.run_dir) os.chdir(self.run_dir) logger.debug( "Running spicker with score_type {0} in directory: {1}".format( score_type, self.run_dir)) logger.debug("Using executable: {0} on {1} processors".format( self.spicker_exe, nproc)) self.score_type = score_type self.create_input_files(models, score_type=score_type, score_matrix=score_matrix) # We need special care if we are running with tm scores as we will be using the OPENMP # version of spicker which requires increasing the stack size on linux and setting the # OMP_NUM_THREADS environment variable on all platforms # The stack size on 64-bit linux seems to be 15Mb, so I guess asking for 50 seems reasonable # I'm assuming that the limit is in bytes and specified by an integer so 50Mb -> 50000000 preexec_fn = None env = {'OMP_NUM_THREADS': str(nproc)} if sys.platform.lower().startswith('linux'): def set_stack(): import resource stack_bytes = 50000000 # 50Mb resource.setrlimit(resource.RLIMIT_STACK, (stack_bytes, stack_bytes)) preexec_fn = set_stack logfile = os.path.abspath("spicker.log") rtn = ample_util.run_command([self.spicker_exe], logfile=logfile, env=env, preexec_fn=preexec_fn) if not rtn == 0: raise RuntimeError( "Error running spicker, check logfile: {0}".format(logfile)) # Read the log and generate the results self.results = self.process_log() # Always go back to where we started os.chdir(owd) return
def merge_mtz(mtz1_path, mtz1_labels, mtz2_path, mtz2_labels): """Create MTZ file with columns from the given mtz files and mtz labels in each file""" # Can't have any duplicates in file labels assert len(mtz1_labels) == len(set(mtz1_labels)),"Duplicate labels in mtz1_labels" assert len(mtz2_labels) == len(set(mtz2_labels)),"Duplicate labels in mtz2_labels" name1 = os.path.splitext(os.path.basename(mtz1_path))[0] name2 = os.path.splitext(os.path.basename(mtz2_path))[0] merged_mtz = os.path.abspath("{0}_{1}.mtz".format(name1, name2)) cmd = [ 'cad', 'hklin1', mtz1_path, 'hklin2', mtz2_path, 'hklout', merged_mtz ] # See if any labels are duplicate and need to be renamed rename = [] # List of (File_number, file_label_idx, orig_label, renamed_label) labels = [] for i, mtz in enumerate([mtz1_path, mtz2_path]): for j, label in enumerate([mtz1_labels, mtz2_labels][i]): if label in labels: newlabel = label + str(i+1) rename.append((i+1,j+1, label, newlabel)) else: newlabel = label rename.append((i+1,j+1, label, None)) assert newlabel not in labels, "Too many duplicate label names: {0}".format(newlabel) labels.append(newlabel) # Build up the list of which labels to extract from which files stdin = "" last_fileno = None for fileno, labelno, orig_label, rename_label in rename: if fileno != last_fileno: if last_fileno is not None: stdin += '\n' # Need to terminate the line stdin += "LABIN FILE {0}".format(fileno) last_fileno = fileno stdin += " E{0}={1}".format(labelno, orig_label) stdin += '\n' # Need to terminate the line # Do any renaming for duplicate labels last_fileno = None for i, (fileno, label_idx, orig_label, rename_label) in enumerate(rename): if rename_label is not None: if last_fileno != fileno: stdin += 'LABOUT FILE_NUMBER {0}'.format(fileno) if last_fileno is not None: # for anything other then then first, we need to terminate this block stdin += '\n' last_fileno = fileno if fileno == last_fileno: stdin += ' E{0}={1}'.format(label_idx,rename_label) if fileno is not None: stdin += '\n' # Add last linebreak as we have added a rename clause logfile = os.path.abspath("cad.log") retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile) if retcode != 0: raise RuntimeError("Error running command: {0}\nCheck logfile: {1}".format(" ".join(cmd), logfile)) else: os.unlink(logfile) return os.path.abspath(merged_mtz), labels
def worker(inqueue, early_terminate=False, check_success=None): """Worker process to run MrBump jobs until no more left. This function keeps looping over the inqueue, removing jobs from the inqueue until there are no more left. It checks if a jobs has succeeded and if so it will terminate. Parameters ---------- inqueue : :obj:`Queue` A Python Queue object early_terminate : bool Terminate on first success or continue running check_success : callable A callable to check the success status of a job Warnings -------- This needs to import the main module that it lives in so maybe this should live in a separate module? """ if early_terminate: assert callable(check_success) success = True while True: if inqueue.empty(): logger.debug("worker {0} got empty inqueue".format( multiprocessing.current_process().name)) rcode = 0 if success else 1 sys.exit(rcode) # Got a script so run job = inqueue.get() # Get name from script logger.debug("Worker {0} running job {1}".format( multiprocessing.current_process().name, job)) directory, sname = os.path.split(job) jobname = os.path.splitext(sname)[0] # Change directory to the script directory os.chdir(directory) retcode = ample_util.run_command([job], logfile=jobname + ".log", dolog=False, check=True) # Can we use the retcode to check? # REM - is retcode object if retcode != 0: logger.warning("WARNING! Worker {0} got retcode {1}".format( multiprocessing.current_process().name, retcode)) success = False # Now check the result if early terminate if early_terminate: if check_success(job): logger.debug("Worker {0} job succeeded".format( multiprocessing.current_process().name)) sys.exit(0)
def merge_mtz(mtz1_path, mtz1_labels, mtz2_path, mtz2_labels): """Create MTZ file with columns from the given mtz files and mtz labels in each file""" # Can't have any duplicates in file labels assert len(mtz1_labels) == len( set(mtz1_labels)), "Duplicate labels in mtz1_labels" assert len(mtz2_labels) == len( set(mtz2_labels)), "Duplicate labels in mtz2_labels" name1 = os.path.splitext(os.path.basename(mtz1_path))[0] name2 = os.path.splitext(os.path.basename(mtz2_path))[0] merged_mtz = os.path.abspath("{0}_{1}.mtz".format(name1, name2)) cmd = [ 'cad', 'hklin1', mtz1_path, 'hklin2', mtz2_path, 'hklout', merged_mtz ] # See if any labels are duplicate and need to be renamed rename = [ ] # List of (File_number, file_label_idx, orig_label, renamed_label) labels = [] for i, mtz in enumerate([mtz1_path, mtz2_path]): for j, label in enumerate([mtz1_labels, mtz2_labels][i]): if label in labels: newlabel = label + str(i + 1) rename.append((i + 1, j + 1, label, newlabel)) else: newlabel = label rename.append((i + 1, j + 1, label, None)) assert newlabel not in labels, "Too many duplicate label names: {0}".format( newlabel) labels.append(newlabel) # Build up the list of which labels to extract from which files stdin = "" last_fileno = None for fileno, labelno, orig_label, rename_label in rename: if fileno != last_fileno: if last_fileno is not None: stdin += '\n' # Need to terminate the line stdin += "LABIN FILE {0}".format(fileno) last_fileno = fileno stdin += " E{0}={1}".format(labelno, orig_label) stdin += '\n' # Need to terminate the line # Do any renaming for duplicate labels last_fileno = None for i, (fileno, label_idx, orig_label, rename_label) in enumerate(rename): if rename_label is not None: if last_fileno != fileno: stdin += 'LABOUT FILE_NUMBER {0}'.format(fileno) if last_fileno is not None: # for anything other then then first, we need to terminate this block stdin += '\n' last_fileno = fileno if fileno == last_fileno: stdin += ' E{0}={1}'.format(label_idx, rename_label) if fileno is not None: stdin += '\n' # Add last linebreak as we have added a rename clause logfile = os.path.abspath("cad.log") retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile) if retcode != 0: raise RuntimeError, "Error running command: {0}\nCheck logfile: {1}".format( " ".join(cmd), logfile) return os.path.abspath(merged_mtz), labels
def superpose_models(self, models, work_dir=None, basename='theseus', homologs=False, alignment_file=None): """Superpose models and return the ensemble. Also set superposed_models and var_by_res variables. This also sets the `superposed_models` and `var_by_res` parameters. Parameters ---------- models : :obj:`list` List of pdb files to be superposed. work_dir: str The directory to run theseus in and generate all the output files basename : str The stem that will be used to name all files homologs : bool True if the pdbs are homologous models as opposed to ab initio ones alignment_file : str An externally generated alignment file for homolgous models in FASTA format Returns ------- superposed_models : a pdb file containing an ensemble of the superposed models """ self._set_work_dir(work_dir) if homologs: # Theseus expects all the models to be in the directory that it is run in as the string given in # the fasta header is used to construct the file names of the aligned pdb files. If a full or # relative path is given (e.g. /foo/bar.pdb), it tries to create files called "basename_/foo/bar.pdb" # We therefore copy the models in and then delete them afterwards if not alignment_file: alignment_file = self.alignment_file(models) copy_models = [ os.path.join(self.work_dir, os.path.basename(m)) for m in models ] for orig, copy in zip(models, copy_models): shutil.copy(orig, copy) models = copy_models # -Z included so we don't line the models up to the principle axis and -o so that they all line # up with the first model #cmd = [ self.theseus_exe, '-a0', '-r', basename ] cmd = [ self.theseus_exe, '-a0', '-r', basename, '-Z', '-o', os.path.basename(models[0]) ] if homologs: cmd += ['-A', alignment_file] cmd += [os.path.basename(m) for m in models] else: # Not sure why we had relpath - fails some of the tests so changing #cmd += [ os.path.relpath(m,self.work_dir) for m in models ] cmd += models self.theseus_log = os.path.join(self.work_dir, "tlog_{0}.log".format(basename)) retcode = ample_util.run_command(cmd, logfile=self.theseus_log, directory=self.work_dir) if retcode != 0: raise RuntimeError( "non-zero return code for theseus in superpose_models!\n See log: {0}" .format(self.theseus_log)) self.variance_log = os.path.join(self.work_dir, '{0}_variances.txt'.format(basename)) self.superposed_models = os.path.join(self.work_dir, '{0}_sup.pdb'.format(basename)) if homologs: # Horrible - need to rename the models so that they match the names in the alignment file self.aligned_models = [] for m in copy_models: mb = os.path.basename(m) aligned_model = os.path.join(self.work_dir, "{0}_{1}".format(basename, mb)) os.unlink(m) os.rename(aligned_model, os.path.join(self.work_dir, mb)) self.aligned_models.append(mb) # Set the variances self.var_by_res = self.parse_variances() return self.superposed_models