def preprocess(self): """ Things to do before the main process runs 1. Change to the correct directory 2. Print out the reference for Stat pipeline """ if self.verbose: self.logger.debug("LabelitPP::preprocess") Utils.folders(self) #print out recognition of the program being used self.print_info()
def launchJob(inp2,k): """ Launch Phaser job on cluster and pass back the process job and pid. """ Utils.folders(self,k) #Remove leftover file if rerunning. if os.path.exists('adf.com'): os.system('rm -rf adf.com') queue = Queue() j = Process(target=RunPhaser,args=(inp2,queue,self.logger)) j.start() queue.get()#For the log I am not using if self.output['jobs'] == None: self.output['jobs'] = {j:k} self.output['pids'] = {k:queue.get()} else: self.output['jobs'].update({j:k}) self.output['pids'].update({k:queue.get()}) #Setup initial results for all running jobs. self.phaser_results[k] = { 'AutoMR results' : Parse.setPhaserFailed('Still running')}
def launchJob(inp2, k): """ Launch Phaser job on cluster and pass back the process job and pid. """ Utils.folders(self, k) #Remove leftover file if rerunning. if os.path.exists('adf.com'): os.system('rm -rf adf.com') queue = Queue() j = Process(target=RunPhaser, args=(inp2, queue, self.logger)) j.start() queue.get() #For the log I am not using if self.output['jobs'] == None: self.output['jobs'] = {j: k} self.output['pids'] = {k: queue.get()} else: self.output['jobs'].update({j: k}) self.output['pids'].update({k: queue.get()}) #Setup initial results for all running jobs. self.phaser_results[k] = { 'AutoMR results': Parse.setPhaserFailed('Still running') }
def Queue(self): """ queue system. """ if self.verbose: self.logger.debug('AutoMolRep::Queue') try: timed_out = False timer = 0 jobs = self.output['jobs'].keys() #set which jobs to watch. if self.run_before: jobs = [job for job in jobs if self.output['jobs'][job][-1] == '1'] else: jobs = [job for job in jobs if self.output['jobs'][job][-1] == '0'] counter = len(jobs) while counter != 0: for job in jobs: if job.is_alive() == False: jobs.remove(job) if self.verbose: self.logger.debug('Finished Phaser on %s'%self.output['jobs'][job]) Utils.folders(self,self.output['jobs'][job]) if self.adf: if os.path.exists('adf.com'): del self.output['pids'][self.output['jobs'][job]] counter -= 1 else: key = self.output['jobs'].pop(job) p = self.postprocessPhaser(key) if p == 'ADF': #Calculate ADF map. adf = Process(target=Utils.calcADF,name='ADF%s'%key,args=(self,key)) adf.start() jobs.append(adf) self.output['jobs'][adf] = key else: counter -= 1 self.postprocess(False) else: self.postprocessPhaser(self.output['jobs'][job]) del self.output['pids'][self.output['jobs'][job]] self.postprocess(False) counter -= 1 time.sleep(0.2) timer += 0.2 if self.verbose: number = round(timer%1,1) if number in (0.0,1.0): print 'Waiting for Phaser to finish %s seconds'%timer if self.phaser_timer: if timer >= self.phaser_timer: timed_out = True break if timed_out: self.logger.debug('Phaser timed out.') print 'Phaser timed out.' for job in jobs: self.phaser_results[self.output['jobs'][job]] = {'AutoMR results':Parse.setPhaserFailed('Timed out')} if self.cluster_use: #Utils.killChildrenCluster(self,self.output['pids'][self.output['jobs'][job]]) BLspec.killChildrenCluster(self,self.output['pids'][self.output['jobs'][job]]) else: Utils.killChildren(self,self.output['pids'][self.output['jobs'][job]]) #Check if solution has been found. if self.run_before == False: self.checkSolution() self.logger.debug('Phaser finished.') except: self.logger.exception('**ERROR in AutoMolRep.Queue**')
def run_queue(self): """ queue system. """ self.logger.debug("PDBQuery::run_queue") try: timed_out = False timer = 0 if self.jobs != {}: jobs = self.jobs.keys() while len(jobs) != 0: for job in jobs: if job.is_alive() == False: jobs.remove(job) code = self.jobs.pop(job) xutils.folders(self, "Phaser_%s" % code) new_jobs = [] if self.test == False: del self.pids[code] #if self.verbose: self.logger.debug("Finished Phaser on %s" % code) p = self.postprocess_phaser(code) # if p.count("rigid"): # if os.path.exists("rigid.log") == False: # j = Process(target=self.process_refine, args=(code, )) # j.start() # new_jobs.append(j) # if self.test: # time.sleep(5) # if p.count("ADF"): # if os.path.exists("adf.com") == False: # j = Process(target=xutils.calcADF, args=(self, code)) # j.start() # new_jobs.append(j) # if len(new_jobs) > 0: # for j1 in new_jobs: # self.jobs[j1] = code # jobs.append(j1) time.sleep(0.2) timer += 0.2 if self.phaser_timer: if timer >= self.phaser_timer: timed_out = True break if timed_out: for j in self.jobs.values(): if self.pids.has_key(j): if self.cluster_use: # TODO # BLspec.killChildrenCluster(self,self.pids[j]) pass else: xutils.killChildren(self, self.pids[j]) if self.phaser_results.has_key(j) == False: self.phaser_results[j] = {"AutoMR results": Parse.setPhaserFailed("Timed out")} if self.verbose: self.logger.debug("PDBQuery timed out.") print "PDBQuery timed out." if self.verbose: self.logger.debug("PDBQuery.run_queue finished.") except: self.logger.exception("**ERROR in PDBQuery.run_queue**")
def process_phaser(self): """Start Phaser for input pdb""" self.logger.debug("process_phaser") self.tprint("\nStarting molecular replacement", level=30, color="blue") # POOL = Pool(processes=4) # # def launch_job(inp): # """Run a phaser process and retrieve results""" # # print "launch_job", inp # # queue = Queue() # result = POOL.apply_async(phaser_func, (inp, queue, self.logger)) # # # queue = Queue() # # job = Process(target=RunPhaser, args=(inp, queue, self.logger)) # # job.start() # # # Get results # # queue.get() # For the log I don"t use # # self.jobs[job] = inp["name"] # # self.pids[inp["name"]] = queue.get() # Run through the pdbs self.tprint(" Assembling Phaser runs", level=10, color="white") commands = [] for code in self.cell_output.keys(): self.tprint(" %s" % code, level=30, color="white") l = False copy = 1 # Create directory for MR xutils.create_folders(self.working_dir, "Phaser_%s" % code) # The cif file name cif_file = os.path.basename(self.cell_output[code].get("path")) # print "cif_file", cif_file gzip_file = cif_file+".gz" # print "gzip_file", gzip_file cached_file = False # Is the cif file in the local cache? if self.cif_cache: cached_file = os.path.join(self.cif_cache, gzip_file) # print "cached_file", cached_file if os.path.exists(cached_file): self.tprint(" Have cached cif file %s" % gzip_file, level=10, color="white") else: # Get the gzipped cif file from the PDBQ server self.tprint(" Fetching %s" % cif_file, level=10, color="white") try: response = urllib2.urlopen(urllib2.Request(\ "%s/entry/get_cif/%s" % \ (PDBQ_SERVER, cif_file.replace(".cif", "")))\ , timeout=60).read() except urllib2.HTTPError as http_error: self.tprint(" %s when fetching %s" % (http_error, cif_file), level=50, color="red") continue # Write the gzip file with open(cached_file, "wb") as outfile: outfile.write(response) # Copy the gzip file to the cwd # print "Copying %s to %s" % (cached_file, os.path.join(os.getcwd(), gzip_file)) shutil.copy(cached_file, os.path.join(os.getcwd(), gzip_file)) # No local CIF file cache else: # Get the gzipped cif file from the PDBQ server self.tprint(" Fetching %s" % cif_file, level=10, color="white") try: response = urllib2.urlopen(urllib2.Request(\ "%s/entry/get_cif/%s" % \ (PDBQ_SERVER, cif_file.replace(".cif", ""))), \ timeout=60).read() except urllib2.HTTPError as http_error: self.tprint(" %s when fetching %s" % (http_error, cif_file), level=50, color="red") continue # Write the gzip file with open(gzip_file, "wb") as outfile: outfile.write(response) # Uncompress the gzipped file unzip_proc = subprocess.Popen(["gunzip", gzip_file]) unzip_proc.wait() # If mmCIF, checks if file exists or if it is super structure with # multiple PDB codes, and returns False, otherwise sends back SG. sg_pdb = xutils.fix_spacegroup(xutils.get_spacegroup_info(cif_file)) # Remove codes that won't run or PDB/mmCIF's that don't exist. if sg_pdb == False: del self.cell_output[code] continue # Convert from cif to pdb conversion_proc = subprocess.Popen(["phenix.cif_as_pdb", cif_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE) conversion_proc.wait() cif_file = cif_file.replace(".cif", ".pdb") # Now check all SG's sg_num = xutils.convert_spacegroup(sg_pdb) lg_pdb = xutils.get_sub_groups(sg_num, "simple") self.tprint(" %s spacegroup: %s (%s)" % (cif_file, sg_pdb, sg_num), level=10, color="white") self.tprint(" subgroups: %s" % str(lg_pdb), level=10, color="white") # SG from data data_spacegroup = xutils.convert_spacegroup(self.laue, True) # self.tprint(" Data spacegroup: %s" % data_spacegroup, level=10, color="white") # Fewer mols in AU or in self.common. if code in self.common or float(self.laue) > float(lg_pdb): # if SM is lower sym, which will cause problems, since PDB is too big. # Need full path for copying pdb files to folders. pdb_info = xutils.get_pdb_info(os.path.join(os.getcwd(), cif_file), dres=self.dres, matthews=True, cell_analysis=False, data_file=self.datafile) #Prune if only one chain present, b/c "all" and "A" will be the same. if len(pdb_info.keys()) == 2: for key in pdb_info.keys(): if key != "all": del pdb_info[key] copy = pdb_info["all"]["NMol"] if copy == 0: copy = 1 # If pdb_info["all"]["res"] == 0.0: if pdb_info["all"]["SC"] < 0.2: # Only run on chains that will fit in the AU. l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0] # More mols in AU elif float(self.laue) < float(lg_pdb): pdb_info = xutils.get_pdb_info(cif_file=cif_file, dres=self.dres, matthews=True, cell_analysis=True, data_file=self.datafile) copy = pdb_info["all"]["NMol"] # Same number of mols in AU. else: pdb_info = xutils.get_pdb_info(cif_file=cif_file, dres=self.dres, matthews=False, cell_analysis=True, data_file=self.datafile) job_description = { "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % code)), "data": self.datafile, "pdb": cif_file, "name": code, "verbose": self.verbose, "sg": data_spacegroup, "copy": copy, "test": self.test, "cluster": self.cluster_use, "cell analysis": True, "large": self.large_cell, "res": xutils.set_phaser_res(pdb_info["all"]["res"], self.large_cell, self.dres), "timeout": self.phaser_timer} if not l: commands.append(job_description) else: # d1 = {} for chain in l: new_code = "%s_%s" % (code, chain) xutils.folders(self, "Phaser_%s" % new_code) job_description.update({ "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \ new_code)), "pdb":pdb_info[chain]["file"], "name":new_code, "copy":pdb_info[chain]["NMol"], "res":xutils.set_phaser_res(pdb_info[chain]["res"], self.large_cell, self.dres)}) commands.append(job_description) # pprint(commands) # phaser_results = [] # for command in commands: # phaser_results.append(phaser_func(command)) # Run in pool pool = Pool(2) self.tprint(" Initiating Phaser runs", level=10, color="white") results = pool.map_async(phaser_func, commands) pool.close() pool.join() phaser_results = results.get() return phaser_results
def process_phaser(self): """Start Phaser for input pdb""" self.logger.debug("process_phaser") self.tprint("\nStarting molecular replacement", level=30, color="blue") self.tprint(" Assembling Phaser runs", level=10, color="white") def launch_job(inp): """Launch the Phaser job""" #self.logger.debug("process_phaser Launching %s"%inp['name']) tag = 'Phaser_%d' % random.randint(0, 10000) if self.computer_cluster: # Create a unique identifier for Phaser results inp['tag'] = tag # Send Redis settings so results can be sent thru redis #inp['db_settings'] = self.site.CONTROL_DATABASE_SETTINGS # Don't need result queue since results will be sent via Redis queue = False else: inp['pool'] = self.pool # Add result queue queue = self.manager.Queue() inp['result_queue'] = queue #if self.pool: # inp['pool'] = self.pool #else: # inp['tag'] = tag #job, pid, tag = run_phaser(**inp) job, pid = run_phaser(**inp) self.jobs[job] = { 'name': inp['name'], 'pid': pid, 'tag': tag, 'result_queue': queue, 'spacegroup': inp['spacegroup'] # Need for jobs that timeout. } # Run through the pdbs for pdb_code in self.cell_output.keys(): self.tprint(" %s" % pdb_code, level=30, color="white") l = False copy = 1 # Create directory for MR xutils.create_folders(self.working_dir, "Phaser_%s" % pdb_code) cif_file = pdb_code.lower() + ".cif" # Get the structure file if self.test and os.path.exists(cif_file): cif_path = os.path.join(os.getcwd(), cif_file) else: cif_path = self.repository.download_cif( pdb_code, os.path.join(os.getcwd(), cif_file)) if not cif_path: self.postprocess_invalid_code(pdb_code) else: # If mmCIF, checks if file exists or if it is super structure with # multiple PDB codes, and returns False, otherwise sends back SG. spacegroup_pdb = xutils.fix_spacegroup( get_spacegroup_info(cif_path)) if not spacegroup_pdb: del self.cell_output[pdb_code] continue # Now check all SG's spacegroup_num = xutils.convert_spacegroup(spacegroup_pdb) lg_pdb = xutils.get_sub_groups(spacegroup_num, "laue") self.tprint(" %s spacegroup: %s (%s)" % (cif_path, spacegroup_pdb, spacegroup_num), level=10, color="white") self.tprint(" subgroups: %s" % str(lg_pdb), level=10, color="white") # SG from data data_spacegroup = xutils.convert_spacegroup(self.laue, True) # self.tprint(" Data spacegroup: %s" % data_spacegroup, level=10, color="white") # Fewer mols in AU or in common_contaminents. if pdb_code in self.common_contaminants or float( self.laue) > float(lg_pdb): # if SM is lower sym, which will cause problems, since PDB is too big. pdb_info = get_pdb_info(struct_file=cif_path, data_file=self.data_file, dres=self.dres, matthews=True, chains=True) # Prune if only one chain present, b/c "all" and "A" will be the same. if len(pdb_info.keys()) == 2: for key in pdb_info.keys(): if key != "all": del pdb_info[key] copy = pdb_info["all"]["NMol"] if copy == 0: copy = 1 # If pdb_info["all"]["res"] == 0.0: if pdb_info["all"]["SC"] < 0.2: # Only run on chains that will fit in the AU. l = [ chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0 ] # More mols in AU elif float(self.laue) < float(lg_pdb): pdb_info = get_pdb_info(struct_file=cif_path, data_file=self.data_file, dres=self.dres, matthews=True, chains=False) copy = pdb_info["all"]["NMol"] # Same number of mols in AU. else: pdb_info = get_pdb_info(struct_file=cif_path, data_file=self.data_file, dres=self.dres, matthews=False, chains=False) job_description = { "work_dir": os.path.abspath( os.path.join(self.working_dir, "Phaser_%s" % pdb_code)), # "data_file": self.data_file, "struct_file": cif_path, "name": pdb_code, # "spacegroup": data_spacegroup, "ncopy": copy, # #"test": self.test, "cell_analysis": True, # #"large_cell": self.large_cell, "resolution": xutils.set_phaser_res(pdb_info["all"]["res"], self.large_cell, self.dres), "launcher": self.launcher, # "db_settings": self.db_settings, # "tag": False, # "batch_queue": self.batch_queue, # "rapd_python": self.rapd_python } if not l: launch_job(job_description) else: for chain in l: new_code = "%s_%s" % (pdb_code, chain) xutils.folders(self, "Phaser_%s" % new_code) job_description.update({ "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \ new_code)), "struct_file": pdb_info[chain]["file"], "name":new_code, "ncopy":pdb_info[chain]["NMol"], "resolution":xutils.set_phaser_res(pdb_info[chain]["res"], self.large_cell, self.dres)}) launch_job(job_description)
def Queue(self): """ queue system. """ if self.verbose: self.logger.debug('AutoMolRep::Queue') try: timed_out = False timer = 0 jobs = self.output['jobs'].keys() #set which jobs to watch. if self.run_before: jobs = [ job for job in jobs if self.output['jobs'][job][-1] == '1' ] else: jobs = [ job for job in jobs if self.output['jobs'][job][-1] == '0' ] counter = len(jobs) while counter != 0: for job in jobs: if job.is_alive() == False: jobs.remove(job) if self.verbose: self.logger.debug('Finished Phaser on %s' % self.output['jobs'][job]) Utils.folders(self, self.output['jobs'][job]) if self.adf: if os.path.exists('adf.com'): del self.output['pids'][self.output['jobs'] [job]] counter -= 1 else: key = self.output['jobs'].pop(job) p = self.postprocessPhaser(key) if p == 'ADF': #Calculate ADF map. adf = Process(target=Utils.calcADF, name='ADF%s' % key, args=(self, key)) adf.start() jobs.append(adf) self.output['jobs'][adf] = key else: counter -= 1 self.postprocess(False) else: self.postprocessPhaser(self.output['jobs'][job]) del self.output['pids'][self.output['jobs'][job]] self.postprocess(False) counter -= 1 time.sleep(0.2) timer += 0.2 if self.verbose: number = round(timer % 1, 1) if number in (0.0, 1.0): print 'Waiting for Phaser to finish %s seconds' % timer if self.phaser_timer: if timer >= self.phaser_timer: timed_out = True break if timed_out: self.logger.debug('Phaser timed out.') print 'Phaser timed out.' for job in jobs: self.phaser_results[self.output['jobs'][job]] = { 'AutoMR results': Parse.setPhaserFailed('Timed out') } if self.cluster_use: #Utils.killChildrenCluster(self,self.output['pids'][self.output['jobs'][job]]) BLspec.killChildrenCluster( self, self.output['pids'][self.output['jobs'][job]]) else: Utils.killChildren( self, self.output['pids'][self.output['jobs'][job]]) #Check if solution has been found. if self.run_before == False: self.checkSolution() self.logger.debug('Phaser finished.') except: self.logger.exception('**ERROR in AutoMolRep.Queue**')
def process_phaser(self): """Start Phaser for input pdb""" self.logger.debug("process_phaser") self.tprint("\nStarting molecular replacement", level=30, color="blue") self.tprint(" Assembling Phaser runs", level=10, color="white") def launch_job(inp): """Launch the Phaser job""" #self.logger.debug("process_phaser Launching %s"%inp['name']) if self.pool: inp['pool'] = self.pool job, pid, output_id = run_phaser(**inp) self.jobs[job] = {'name': inp['name'], 'pid' : pid, 'output_id' : output_id} # Run through the pdbs for pdb_code in self.cell_output.keys(): self.tprint(" %s" % pdb_code, level=30, color="white") l = False copy = 1 # Create directory for MR xutils.create_folders(self.working_dir, "Phaser_%s" % pdb_code) cif_file = pdb_code.lower() + ".cif" # Get the structure file if self.test and os.path.exists(cif_file): cif_path = os.path.join(os.getcwd(), cif_file) else: cif_path = self.repository.download_cif(pdb_code, os.path.join(os.getcwd(), cif_file)) if not cif_path: self.postprocess_invalid_code(pdb_code) else: # If mmCIF, checks if file exists or if it is super structure with # multiple PDB codes, and returns False, otherwise sends back SG. spacegroup_pdb = xutils.fix_spacegroup(get_spacegroup_info(cif_path)) if not spacegroup_pdb: del self.cell_output[pdb_code] # Now check all SG's spacegroup_num = xutils.convert_spacegroup(spacegroup_pdb) lg_pdb = xutils.get_sub_groups(spacegroup_num, "laue") self.tprint(" %s spacegroup: %s (%s)" % (cif_path, spacegroup_pdb, spacegroup_num), level=10, color="white") self.tprint(" subgroups: %s" % str(lg_pdb), level=10, color="white") # SG from data data_spacegroup = xutils.convert_spacegroup(self.laue, True) # self.tprint(" Data spacegroup: %s" % data_spacegroup, level=10, color="white") # Fewer mols in AU or in common_contaminents. if pdb_code in self.common_contaminants or float(self.laue) > float(lg_pdb): # if SM is lower sym, which will cause problems, since PDB is too big. pdb_info = get_pdb_info(cif_file=cif_path, data_file=self.datafile, dres=self.dres, matthews=True, chains=True) # Prune if only one chain present, b/c "all" and "A" will be the same. if len(pdb_info.keys()) == 2: for key in pdb_info.keys(): if key != "all": del pdb_info[key] copy = pdb_info["all"]["NMol"] if copy == 0: copy = 1 # If pdb_info["all"]["res"] == 0.0: if pdb_info["all"]["SC"] < 0.2: # Only run on chains that will fit in the AU. l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0] # More mols in AU elif float(self.laue) < float(lg_pdb): pdb_info = get_pdb_info(cif_file=cif_path, data_file=self.datafile, dres=self.dres, matthews=True, chains=False) copy = pdb_info["all"]["NMol"] # Same number of mols in AU. else: pdb_info = get_pdb_info(cif_file=cif_path, data_file=self.datafile, dres=self.dres, matthews=False, chains=False) job_description = { "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % pdb_code)), "datafile": self.datafile, "cif": cif_path, #"pdb": cif_path, "name": pdb_code, "spacegroup": data_spacegroup, "ncopy": copy, "test": self.test, "cell_analysis": True, "large_cell": self.large_cell, "resolution": xutils.set_phaser_res(pdb_info["all"]["res"], self.large_cell, self.dres), "launcher": self.launcher, "db_settings": self.db_settings, "output_id": False, "batch_queue": self.batch_queue} if not l: launch_job(job_description) else: for chain in l: new_code = "%s_%s" % (pdb_code, chain) xutils.folders(self, "Phaser_%s" % new_code) job_description.update({ "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \ new_code)), "cif":pdb_info[chain]["file"], #"pdb":pdb_info[chain]["file"], "name":new_code, "ncopy":pdb_info[chain]["NMol"], "resolution":xutils.set_phaser_res(pdb_info[chain]["res"], self.large_cell, self.dres)}) launch_job(job_description)
def Queue(self): """ queue system. """ if self.verbose: self.logger.debug('PDBQuery::Queue') try: timed_out = False timer = 0 if self.jobs != {}: jobs = self.jobs.keys() while len(jobs) != 0: for job in jobs: if job.is_alive() == False: jobs.remove(job) code = self.jobs.pop(job) Utils.folders(self,'Phaser_%s'%code) new_jobs = [] if self.test == False: del self.pids[code] #if self.verbose: self.logger.debug('Finished Phaser on %s'%code) p = self.postprocessPhaser(code) if p.count('rigid'): if os.path.exists('rigid.log') == False: j = Process(target=self.processRefine,args=(code,)) j.start() new_jobs.append(j) if self.test: time.sleep(5) if p.count('ADF'): if os.path.exists('adf.com') == False: j = Process(target=Utils.calcADF,args=(self,code)) j.start() new_jobs.append(j) if len(new_jobs) > 0: for j1 in new_jobs: self.jobs[j1] = code jobs.append(j1) time.sleep(0.2) timer += 0.2 if self.phaser_timer: if timer >= self.phaser_timer: timed_out = True break if timed_out: for j in self.jobs.values(): if self.pids.has_key(j): if self.cluster_use: BLspec.killChildrenCluster(self,self.pids[j]) else: Utils.killChildren(self,self.pids[j]) if self.phaser_results.has_key(j) == False: self.phaser_results[j] = {'AutoMR results': Parse.setPhaserFailed('Timed out')} if self.verbose: self.logger.debug('PDBQuery timed out.') print 'PDBQuery timed out.' if self.verbose: self.logger.debug('PDBQuery.Queue finished.') except: self.logger.exception('**ERROR in PDBQuery.Queue**')
def processPhaser(self): """ Start Phaser for input pdb. """ if self.verbose: self.logger.debug('PDBQuery::processPhaser') def launchJob(inp): queue = Queue() job = Process(target=RunPhaser,args=(inp,queue,self.logger)) job.start() queue.get()#For the log I don't use self.jobs[job] = inp['name'] self.pids[inp['name']] = queue.get() try: for code in self.cell_output.keys(): #for code in ['4ER2']: l = False copy = 1 Utils.folders(self,'Phaser_%s'%code) f = os.path.basename(self.cell_output[code].get('path')) #Check if symlink exists and create if not. if os.path.exists(f) == False: os.symlink(self.cell_output[code].get('path'),f) #If mmCIF, checks if file exists or if it is super structure with #multiple PDB codes, and returns False, otherwise sends back SG. sg_pdb = Utils.fixSG(self,Utils.getSGInfo(self,f)) #Remove codes that won't run or PDB/mmCIF's that don't exist. if sg_pdb == False: del self.cell_output[code] continue #**Now check all SG's** lg_pdb = Utils.subGroups(self,Utils.convertSG(self,sg_pdb),'simple') #SG from data sg = Utils.convertSG(self,self.laue,True) #Fewer mols in AU or in self.common. if code in self.common or float(self.laue) > float(lg_pdb): #if SM is lower sym, which will cause problems, since PDB is too big. #Need full path for copying pdb files to folders. pdb_info = Utils.getPDBInfo(self,os.path.join(os.getcwd(),f)) #Prune if only one chain present, b/c 'all' and 'A' will be the same. if len(pdb_info.keys()) == 2: for key in pdb_info.keys(): if key != 'all': del pdb_info[key] copy = pdb_info['all']['NMol'] if copy == 0: copy = 1 #if pdb_info['all']['res'] == 0.0: if pdb_info['all']['SC'] < 0.2: #Only run on chains that will fit in the AU. l = [chain for chain in pdb_info.keys() if pdb_info[chain]['res'] != 0.0] #More mols in AU elif float(self.laue) < float(lg_pdb): pdb_info = Utils.getPDBInfo(self,f,True,True) copy = pdb_info['all']['NMol'] #Same number of mols in AU. else: pdb_info = Utils.getPDBInfo(self,f,False,True) d = {'data':self.datafile,'pdb':f,'name':code,'verbose':self.verbose,'sg':sg, 'copy':copy,'test':self.test,'cluster':self.cluster_use,'cell analysis':True, 'large':self.large_cell,'res':Utils.setPhaserRes(self,pdb_info['all']['res']), } if l == False: launchJob(d) else: d1 = {} for chain in l: new_code = '%s_%s'%(code,chain) Utils.folders(self,'Phaser_%s'%new_code) d.update({'pdb':pdb_info[chain]['file'],'name':new_code,'copy':pdb_info[chain]['NMol'], 'res':Utils.setPhaserRes(self,pdb_info[chain]['res'])}) launchJob(d) except: self.logger.exception('**ERROR in PDBQuery.processPhaser**')
def run_queue(self): """ queue system. """ if self.verbose: self.logger.debug("PDBQuery::run_queue") try: timed_out = False timer = 0 if self.jobs != {}: jobs = self.jobs.keys() while len(jobs) != 0: for job in jobs: if job.is_alive() == False: jobs.remove(job) code = self.jobs.pop(job) Utils.folders(self, "Phaser_%s" % code) new_jobs = [] if self.test == False: del self.pids[code] #if self.verbose: self.logger.debug("Finished Phaser on %s" % code) p = self.postprocess_phaser(code) if p.count("rigid"): if os.path.exists("rigid.log") == False: j = Process(target=self.process_refine, args=(code, )) j.start() new_jobs.append(j) if self.test: time.sleep(5) if p.count("ADF"): if os.path.exists("adf.com") == False: j = Process(target=Utils.calcADF, args=(self, code)) j.start() new_jobs.append(j) if len(new_jobs) > 0: for j1 in new_jobs: self.jobs[j1] = code jobs.append(j1) time.sleep(0.2) timer += 0.2 if self.phaser_timer: if timer >= self.phaser_timer: timed_out = True break if timed_out: for j in self.jobs.values(): if self.pids.has_key(j): if self.cluster_use: # TODO # BLspec.killChildrenCluster(self,self.pids[j]) pass else: Utils.killChildren(self, self.pids[j]) if self.phaser_results.has_key(j) == False: self.phaser_results[j] = {"AutoMR results": Parse.setPhaserFailed("Timed out")} if self.verbose: self.logger.debug("PDBQuery timed out.") print "PDBQuery timed out." if self.verbose: self.logger.debug("PDBQuery.run_queue finished.") except: self.logger.exception("**ERROR in PDBQuery.run_queue**")
def process_phaser(self): """ Start Phaser for input pdb. """ if self.verbose: self.logger.debug("PDBQuery::process_phaser") def launch_job(inp): queue = Queue() job = Process(target=RunPhaser, args=(inp, queue, self.logger)) job.start() queue.get() # For the log I don"t use self.jobs[job] = inp["name"] self.pids[inp["name"]] = queue.get() try: for code in self.cell_output.keys(): #for code in ["4ER2"]: l = False copy = 1 Utils.folders(self, "Phaser_%s" % code) f = os.path.basename(self.cell_output[code].get("path")) #Check if symlink exists and create if not. if os.path.exists(f) == False: os.symlink(self.cell_output[code].get("path"), f) #If mmCIF, checks if file exists or if it is super structure with #multiple PDB codes, and returns False, otherwise sends back SG. sg_pdb = Utils.fixSG(self, Utils.getSGInfo(self, f)) #Remove codes that won't run or PDB/mmCIF's that don't exist. if sg_pdb == False: del self.cell_output[code] continue #**Now check all SG's** lg_pdb = Utils.subGroups(self, Utils.convertSG(self, sg_pdb), "simple") #SG from data sg = Utils.convertSG(self, self.laue, True) #Fewer mols in AU or in self.common. if code in self.common or float(self.laue) > float(lg_pdb): #if SM is lower sym, which will cause problems, since PDB is too big. #Need full path for copying pdb files to folders. pdb_info = Utils.getPDBInfo(self, os.path.join(os.getcwd(), f)) #Prune if only one chain present, b/c "all" and "A" will be the same. if len(pdb_info.keys()) == 2: for key in pdb_info.keys(): if key != "all": del pdb_info[key] copy = pdb_info["all"]["NMol"] if copy == 0: copy = 1 # If pdb_info["all"]["res"] == 0.0: if pdb_info["all"]["SC"] < 0.2: # Only run on chains that will fit in the AU. l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0] #More mols in AU elif float(self.laue) < float(lg_pdb): pdb_info = Utils.getPDBInfo(self, f, True, True) copy = pdb_info["all"]["NMol"] #Same number of mols in AU. else: pdb_info = Utils.getPDBInfo(self, f, False, True) d = {"data":self.datafile, "pdb":f, "name":code, "verbose":self.verbose, "sg":sg, "copy":copy, "test":self.test, "cluster":self.cluster_use, "cell analysis":True, "large":self.large_cell, "res":Utils.setPhaserRes(self, pdb_info["all"]["res"]), } if l == False: launch_job(d) else: d1 = {} for chain in l: new_code = "%s_%s" % (code, chain) Utils.folders(self, "Phaser_%s" % new_code) d.update({"pdb":pdb_info[chain]["file"], "name":new_code, "copy":pdb_info[chain]["NMol"], "res":Utils.setPhaserRes(self, pdb_info[chain]["res"])}) launch_job(d) except: self.logger.exception("**ERROR in PDBQuery.process_phaser**")
def process_phaser(self): """Start Phaser for input pdb""" self.logger.debug("process_phaser") self.tprint("\nStarting molecular replacement", level=30, color="blue") # POOL = Pool(processes=4) # # def launch_job(inp): # """Run a phaser process and retrieve results""" # # print "launch_job", inp # # queue = Queue() # result = POOL.apply_async(phaser_func, (inp, queue, self.logger)) # # # queue = Queue() # # job = Process(target=RunPhaser, args=(inp, queue, self.logger)) # # job.start() # # # Get results # # queue.get() # For the log I don"t use # # self.jobs[job] = inp["name"] # # self.pids[inp["name"]] = queue.get() # Run through the pdbs self.tprint(" Assembling Phaser runs", level=10, color="white") commands = [] for code in self.cell_output.keys(): self.tprint(" %s" % code, level=30, color="white") l = False copy = 1 # Create directory for MR xutils.create_folders(self.working_dir, "Phaser_%s" % code) # The cif file name cif_file = os.path.basename(self.cell_output[code].get("path")) # print "cif_file", cif_file gzip_file = cif_file + ".gz" # print "gzip_file", gzip_file cached_file = False # Is the cif file in the local cache? if self.cif_cache: cached_file = os.path.join(self.cif_cache, gzip_file) # print "cached_file", cached_file if os.path.exists(cached_file): self.tprint(" Have cached cif file %s" % gzip_file, level=10, color="white") else: # Get the gzipped cif file from the PDBQ server self.tprint(" Fetching %s" % cif_file, level=10, color="white") try: response = urllib2.urlopen(urllib2.Request(\ "%s/entry/get_cif/%s" % \ (PDBQ_SERVER, cif_file.replace(".cif", "")))\ , timeout=60).read() except urllib2.HTTPError as http_error: self.tprint(" %s when fetching %s" % (http_error, cif_file), level=50, color="red") continue # Write the gzip file with open(cached_file, "wb") as outfile: outfile.write(response) # Copy the gzip file to the cwd # print "Copying %s to %s" % (cached_file, os.path.join(os.getcwd(), gzip_file)) shutil.copy(cached_file, os.path.join(os.getcwd(), gzip_file)) # No local CIF file cache else: # Get the gzipped cif file from the PDBQ server self.tprint(" Fetching %s" % cif_file, level=10, color="white") try: response = urllib2.urlopen(urllib2.Request(\ "%s/entry/get_cif/%s" % \ (PDBQ_SERVER, cif_file.replace(".cif", ""))), \ timeout=60).read() except urllib2.HTTPError as http_error: self.tprint(" %s when fetching %s" % (http_error, cif_file), level=50, color="red") continue # Write the gzip file with open(gzip_file, "wb") as outfile: outfile.write(response) # Uncompress the gzipped file unzip_proc = subprocess.Popen(["gunzip", gzip_file]) unzip_proc.wait() # If mmCIF, checks if file exists or if it is super structure with # multiple PDB codes, and returns False, otherwise sends back SG. sg_pdb = xutils.fix_spacegroup( xutils.get_spacegroup_info(cif_file)) # Remove codes that won't run or PDB/mmCIF's that don't exist. if sg_pdb == False: del self.cell_output[code] continue # Convert from cif to pdb conversion_proc = subprocess.Popen(["phenix.cif_as_pdb", cif_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE) conversion_proc.wait() cif_file = cif_file.replace(".cif", ".pdb") # Now check all SG's sg_num = xutils.convert_spacegroup(sg_pdb) lg_pdb = xutils.get_sub_groups(sg_num, "simple") self.tprint(" %s spacegroup: %s (%s)" % (cif_file, sg_pdb, sg_num), level=10, color="white") self.tprint(" subgroups: %s" % str(lg_pdb), level=10, color="white") # SG from data data_spacegroup = xutils.convert_spacegroup(self.laue, True) # self.tprint(" Data spacegroup: %s" % data_spacegroup, level=10, color="white") # Fewer mols in AU or in self.common. if code in self.common or float(self.laue) > float(lg_pdb): # if SM is lower sym, which will cause problems, since PDB is too big. # Need full path for copying pdb files to folders. pdb_info = xutils.get_pdb_info(os.path.join( os.getcwd(), cif_file), dres=self.dres, matthews=True, cell_analysis=False, data_file=self.datafile) #Prune if only one chain present, b/c "all" and "A" will be the same. if len(pdb_info.keys()) == 2: for key in pdb_info.keys(): if key != "all": del pdb_info[key] copy = pdb_info["all"]["NMol"] if copy == 0: copy = 1 # If pdb_info["all"]["res"] == 0.0: if pdb_info["all"]["SC"] < 0.2: # Only run on chains that will fit in the AU. l = [ chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0 ] # More mols in AU elif float(self.laue) < float(lg_pdb): pdb_info = xutils.get_pdb_info(cif_file=cif_file, dres=self.dres, matthews=True, cell_analysis=True, data_file=self.datafile) copy = pdb_info["all"]["NMol"] # Same number of mols in AU. else: pdb_info = xutils.get_pdb_info(cif_file=cif_file, dres=self.dres, matthews=False, cell_analysis=True, data_file=self.datafile) job_description = { "work_dir": os.path.abspath( os.path.join(self.working_dir, "Phaser_%s" % code)), "data": self.datafile, "pdb": cif_file, "name": code, "verbose": self.verbose, "sg": data_spacegroup, "copy": copy, "test": self.test, "cluster": self.cluster_use, "cell analysis": True, "large": self.large_cell, "res": xutils.set_phaser_res(pdb_info["all"]["res"], self.large_cell, self.dres), "timeout": self.phaser_timer } if not l: commands.append(job_description) else: # d1 = {} for chain in l: new_code = "%s_%s" % (code, chain) xutils.folders(self, "Phaser_%s" % new_code) job_description.update({ "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \ new_code)), "pdb":pdb_info[chain]["file"], "name":new_code, "copy":pdb_info[chain]["NMol"], "res":xutils.set_phaser_res(pdb_info[chain]["res"], self.large_cell, self.dres)}) commands.append(job_description) # pprint(commands) # phaser_results = [] # for command in commands: # phaser_results.append(phaser_func(command)) # Run in pool pool = Pool(2) self.tprint(" Initiating Phaser runs", level=10, color="white") results = pool.map_async(phaser_func, commands) pool.close() pool.join() phaser_results = results.get() return phaser_results