Beispiel #1
0
 def preprocess(self):
     """
 Things to do before the main process runs
 1. Change to the correct directory
 2. Print out the reference for MR pipeline
 """
     if self.verbose:
         self.logger.debug('AutoMolRep::preprocess')
     #change directory to the one specified in the incoming dict
     self.working_dir = self.setup.get('work')
     if os.path.exists(self.working_dir) == False:
         os.makedirs(self.working_dir)
     os.chdir(self.working_dir)
     #Convert SCA to MTZ (if needed) and get unit cell and SG info and convert R3/H3
     self.input_sg, self.cell, self.cell2, vol = Utils.getMTZInfo(
         self, False, True, True)
     #Needed for running in different queues for more memory..
     if Utils.calcResNumber(self, self.input_sg, False, vol) > 5000:
         self.large_cell = True
     #Check if input_pdb is pdb file or pdbID.
     if self.input_pdb[-4:].upper() != '.PDB':
         if len(self.input_pdb) == 4:
             #self.input_pdb = Utils.downloadPDB(self,self.input_pdb)
             self.input_pdb = Utils.getmmCIF(self, self.input_pdb)
         else:
             self.postprocess(True)
     else:
         #Have to copy into working_dir to limit path length in Phaser
         try:
             shutil.copy(self.input_pdb, os.getcwd())
         except:
             if self.verbose:
                 self.logger.debug(
                     'Cannot copy input pdb to working dir. Probably already there.'
                 )
         self.input_pdb = os.path.join(os.getcwd(),
                                       os.path.basename(self.input_pdb))
     #Check if pdb exists...
     if self.input_pdb == False:
         self.postprocess(True)
     else:
         self.dres = Utils.getRes(self)
         self.pdb_info = Utils.getPDBInfo(self, self.input_pdb)
         if self.pdb_info == False:
             self.postprocess(True)
         #If user requests to search for more mols, then allow.
         if self.nmol:
             if int(self.pdb_info['all'].get('NMol')) < int(self.nmol):
                 self.pdb_info['all']['NMol'] = self.nmol
     if self.test:
         self.logger.debug('TEST IS SET "ON"')
     #print out recognition of the program being used
     self.PrintInfo()
Beispiel #2
0
 def preprocess(self):
   """
   Things to do before the main process runs
   1. Change to the correct directory
   2. Print out the reference for MR pipeline
   """
   if self.verbose:
     self.logger.debug('AutoMolRep::preprocess')
   #change directory to the one specified in the incoming dict
   self.working_dir = self.setup.get('work')
   if os.path.exists(self.working_dir) == False:
     os.makedirs(self.working_dir)
   os.chdir(self.working_dir)
   #Convert SCA to MTZ (if needed) and get unit cell and SG info and convert R3/H3
   self.input_sg,self.cell,self.cell2,vol = Utils.getMTZInfo(self,False,True,True)
   #Needed for running in different queues for more memory..
   if Utils.calcResNumber(self,self.input_sg,False,vol) > 5000:
     self.large_cell = True
   #Check if input_pdb is pdb file or pdbID.
   if self.input_pdb[-4:].upper() != '.PDB':
     if len(self.input_pdb) == 4:
       #self.input_pdb = Utils.downloadPDB(self,self.input_pdb)
       self.input_pdb = Utils.getmmCIF(self,self.input_pdb)
     else:
       self.postprocess(True)
   else:
     #Have to copy into working_dir to limit path length in Phaser
     try:
       shutil.copy(self.input_pdb,os.getcwd())
     except:
       if self.verbose:
         self.logger.debug('Cannot copy input pdb to working dir. Probably already there.')
     self.input_pdb = os.path.join(os.getcwd(),os.path.basename(self.input_pdb))
   #Check if pdb exists...
   if self.input_pdb == False:
     self.postprocess(True)
   else:
     self.dres = Utils.getRes(self)
     self.pdb_info = Utils.getPDBInfo(self,self.input_pdb)
     if self.pdb_info == False:
       self.postprocess(True)
     #If user requests to search for more mols, then allow.
     if self.nmol:
       if int(self.pdb_info['all'].get('NMol')) < int(self.nmol):
         self.pdb_info['all']['NMol'] = self.nmol
   if self.test:
     self.logger.debug('TEST IS SET "ON"')
   #print out recognition of the program being used
   self.PrintInfo()
Beispiel #3
0
  def processRefine(self,inp):
    """
    Run phenix.refine rigid-body on solution. My old boss wanted this. Will be incorporated
    into future ligand finding pipeline. This can be enabled at top of script, but takes extra time.
    """
    if self.verbose:
      self.logger.debug('PDBQuery::processRefine')
    try:
      pdb = '%s.1.pdb'%inp
      info = Utils.getPDBInfo(self,pdb,False)
      command  = 'phenix.refine %s %s strategy=tls+rigid_body refinement.input.xray_data.labels=IMEAN,SIGIMEAN '%(pdb,self.datafile)
      command += 'refinement.main.number_of_macro_cycles=1 nproc=2'
      chains = [chain for chain in info.keys() if chain != 'all']
      for chain in chains:
        command += ' refine.adp.tls="chain %s"'%chain
      if self.test == False:
        Utils.processLocal((command,'rigid.log'),self.logger)
      else:
        os.system('touch rigid.log')

    except:
      self.logger.exception('**ERROR in PDBQuery.processRefine**')
Beispiel #4
0
  def processPhaser(self):
    """
    Start Phaser for input pdb.
    """
    if self.verbose:
      self.logger.debug('PDBQuery::processPhaser')

    def launchJob(inp):
      queue = Queue()
      job = Process(target=RunPhaser,args=(inp,queue,self.logger))
      job.start()
      queue.get()#For the log I don't use
      self.jobs[job] = inp['name']
      self.pids[inp['name']] = queue.get()

    try:
      for code in self.cell_output.keys():
      #for code in ['4ER2']:
        l = False
        copy = 1
        Utils.folders(self,'Phaser_%s'%code)
        f = os.path.basename(self.cell_output[code].get('path'))
        #Check if symlink exists and create if not.
        if os.path.exists(f) == False:
          os.symlink(self.cell_output[code].get('path'),f)
        #If mmCIF, checks if file exists or if it is super structure with
        #multiple PDB codes, and returns False, otherwise sends back SG.
        sg_pdb = Utils.fixSG(self,Utils.getSGInfo(self,f))
        #Remove codes that won't run or PDB/mmCIF's that don't exist.
        if sg_pdb == False:
          del self.cell_output[code]
          continue
        #**Now check all SG's**
        lg_pdb = Utils.subGroups(self,Utils.convertSG(self,sg_pdb),'simple')
        #SG from data
        sg = Utils.convertSG(self,self.laue,True)
        #Fewer mols in AU or in self.common.
        if code in self.common or float(self.laue) > float(lg_pdb):
          #if SM is lower sym, which will cause problems, since PDB is too big.
          #Need full path for copying pdb files to folders.
          pdb_info = Utils.getPDBInfo(self,os.path.join(os.getcwd(),f))
          #Prune if only one chain present, b/c 'all' and 'A' will be the same.
          if len(pdb_info.keys()) == 2:
            for key in pdb_info.keys():
              if key != 'all':
                del pdb_info[key]
          copy = pdb_info['all']['NMol']
          if copy == 0:
            copy = 1
          #if pdb_info['all']['res'] == 0.0:
          if pdb_info['all']['SC'] < 0.2:
            #Only run on chains that will fit in the AU.
            l = [chain for chain in pdb_info.keys() if pdb_info[chain]['res'] != 0.0]
        #More mols in AU
        elif float(self.laue) < float(lg_pdb):
          pdb_info = Utils.getPDBInfo(self,f,True,True)
          copy = pdb_info['all']['NMol']
        #Same number of mols in AU.
        else:
          pdb_info = Utils.getPDBInfo(self,f,False,True)

        d = {'data':self.datafile,'pdb':f,'name':code,'verbose':self.verbose,'sg':sg,
             'copy':copy,'test':self.test,'cluster':self.cluster_use,'cell analysis':True,
             'large':self.large_cell,'res':Utils.setPhaserRes(self,pdb_info['all']['res']),
            }

        if l == False:
          launchJob(d)
        else:
          d1 = {}
          for chain in l:
            new_code = '%s_%s'%(code,chain)
            Utils.folders(self,'Phaser_%s'%new_code)
            d.update({'pdb':pdb_info[chain]['file'],'name':new_code,'copy':pdb_info[chain]['NMol'],
                      'res':Utils.setPhaserRes(self,pdb_info[chain]['res'])})
            launchJob(d)

    except:
      self.logger.exception('**ERROR in PDBQuery.processPhaser**')
Beispiel #5
0
    def process_phaser(self):
        """
        Start Phaser for input pdb.
        """
        if self.verbose:
            self.logger.debug("PDBQuery::process_phaser")

        def launch_job(inp):
            queue = Queue()
            job = Process(target=RunPhaser, args=(inp, queue, self.logger))
            job.start()
            queue.get()  # For the log I don"t use
            self.jobs[job] = inp["name"]
            self.pids[inp["name"]] = queue.get()

        try:
            for code in self.cell_output.keys():
              #for code in ["4ER2"]:
                l = False
                copy = 1
                Utils.folders(self, "Phaser_%s" % code)
                f = os.path.basename(self.cell_output[code].get("path"))
                #Check if symlink exists and create if not.
                if os.path.exists(f) == False:
                    os.symlink(self.cell_output[code].get("path"), f)
                #If mmCIF, checks if file exists or if it is super structure with
                #multiple PDB codes, and returns False, otherwise sends back SG.
                sg_pdb = Utils.fixSG(self, Utils.getSGInfo(self, f))
                #Remove codes that won't run or PDB/mmCIF's that don't exist.
                if sg_pdb == False:
                    del self.cell_output[code]
                    continue
                #**Now check all SG's**
                lg_pdb = Utils.subGroups(self, Utils.convertSG(self, sg_pdb), "simple")
                #SG from data
                sg = Utils.convertSG(self, self.laue, True)
                #Fewer mols in AU or in self.common.
                if code in self.common or float(self.laue) > float(lg_pdb):
                    #if SM is lower sym, which will cause problems, since PDB is too big.
                    #Need full path for copying pdb files to folders.
                    pdb_info = Utils.getPDBInfo(self, os.path.join(os.getcwd(), f))
                    #Prune if only one chain present, b/c "all" and "A" will be the same.
                    if len(pdb_info.keys()) == 2:
                        for key in pdb_info.keys():
                            if key != "all":
                                del pdb_info[key]
                    copy = pdb_info["all"]["NMol"]
                    if copy == 0:
                        copy = 1
                    # If pdb_info["all"]["res"] == 0.0:
                    if pdb_info["all"]["SC"] < 0.2:
                        # Only run on chains that will fit in the AU.
                        l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0]
                #More mols in AU
                elif float(self.laue) < float(lg_pdb):
                    pdb_info = Utils.getPDBInfo(self, f, True, True)
                    copy = pdb_info["all"]["NMol"]
                #Same number of mols in AU.
                else:
                    pdb_info = Utils.getPDBInfo(self, f, False, True)

                d = {"data":self.datafile, "pdb":f, "name":code, "verbose":self.verbose, "sg":sg,
                     "copy":copy, "test":self.test, "cluster":self.cluster_use, "cell analysis":True,
                     "large":self.large_cell, "res":Utils.setPhaserRes(self, pdb_info["all"]["res"]),
                    }

                if l == False:
                    launch_job(d)
                else:
                    d1 = {}
                    for chain in l:
                        new_code = "%s_%s" % (code, chain)
                        Utils.folders(self, "Phaser_%s" % new_code)
                        d.update({"pdb":pdb_info[chain]["file"], "name":new_code, "copy":pdb_info[chain]["NMol"],
                                  "res":Utils.setPhaserRes(self, pdb_info[chain]["res"])})
                        launch_job(d)

        except:
            self.logger.exception("**ERROR in PDBQuery.process_phaser**")