Exemple #1
0
 def preprocess(self):
     """
     Things to do before the main process runs
     1. Change to the correct directory
     2. Print out the reference for Stat pipeline
     """
     if self.verbose:
         self.logger.debug("LabelitPP::preprocess")
     Utils.folders(self)
     #print out recognition of the program being used
     self.print_info()
Exemple #2
0
 def launchJob(inp2,k):
   """
   Launch Phaser job on cluster and pass back the process job and pid.
   """
   Utils.folders(self,k)
   #Remove leftover file if rerunning.
   if os.path.exists('adf.com'):
     os.system('rm -rf adf.com')
   queue = Queue()
   j = Process(target=RunPhaser,args=(inp2,queue,self.logger))
   j.start()
   queue.get()#For the log I am not using
   if self.output['jobs'] == None:
     self.output['jobs'] = {j:k}
     self.output['pids'] = {k:queue.get()}
   else:
     self.output['jobs'].update({j:k})
     self.output['pids'].update({k:queue.get()})
   #Setup initial results for all running jobs.
   self.phaser_results[k] = { 'AutoMR results' : Parse.setPhaserFailed('Still running')}
Exemple #3
0
   def launchJob(inp2, k):
       """
 Launch Phaser job on cluster and pass back the process job and pid.
 """
       Utils.folders(self, k)
       #Remove leftover file if rerunning.
       if os.path.exists('adf.com'):
           os.system('rm -rf adf.com')
       queue = Queue()
       j = Process(target=RunPhaser, args=(inp2, queue, self.logger))
       j.start()
       queue.get()  #For the log I am not using
       if self.output['jobs'] == None:
           self.output['jobs'] = {j: k}
           self.output['pids'] = {k: queue.get()}
       else:
           self.output['jobs'].update({j: k})
           self.output['pids'].update({k: queue.get()})
       #Setup initial results for all running jobs.
       self.phaser_results[k] = {
           'AutoMR results': Parse.setPhaserFailed('Still running')
       }
Exemple #4
0
  def Queue(self):
    """
    queue system.
    """
    if self.verbose:
      self.logger.debug('AutoMolRep::Queue')
    try:
      timed_out = False
      timer = 0
      jobs = self.output['jobs'].keys()
      #set which jobs to watch.
      if self.run_before:
        jobs = [job for job in jobs if self.output['jobs'][job][-1] == '1']
      else:
        jobs = [job for job in jobs if self.output['jobs'][job][-1] == '0']
      counter = len(jobs)
      while counter != 0:
        for job in jobs:
          if job.is_alive() == False:
            jobs.remove(job)
            if self.verbose:
              self.logger.debug('Finished Phaser on %s'%self.output['jobs'][job])
            Utils.folders(self,self.output['jobs'][job])
            if self.adf:
              if os.path.exists('adf.com'):
                del self.output['pids'][self.output['jobs'][job]]
                counter -= 1
              else:
                key = self.output['jobs'].pop(job)
                p = self.postprocessPhaser(key)
                if p == 'ADF':
                  #Calculate ADF map.
                  adf = Process(target=Utils.calcADF,name='ADF%s'%key,args=(self,key))
                  adf.start()
                  jobs.append(adf)
                  self.output['jobs'][adf] = key
                else:
                  counter -= 1
                self.postprocess(False)
            else:
              self.postprocessPhaser(self.output['jobs'][job])
              del self.output['pids'][self.output['jobs'][job]]
              self.postprocess(False)
              counter -= 1
        time.sleep(0.2)
        timer += 0.2
        if self.verbose:
          number = round(timer%1,1)
          if number in (0.0,1.0):
            print 'Waiting for Phaser to finish %s seconds'%timer
        if self.phaser_timer:
          if timer >= self.phaser_timer:
            timed_out = True
            break
      if timed_out:
        self.logger.debug('Phaser timed out.')
        print 'Phaser timed out.'
        for job in jobs:
          self.phaser_results[self.output['jobs'][job]] = {'AutoMR results':Parse.setPhaserFailed('Timed out')}
          if self.cluster_use:
            #Utils.killChildrenCluster(self,self.output['pids'][self.output['jobs'][job]])
            BLspec.killChildrenCluster(self,self.output['pids'][self.output['jobs'][job]])
          else:
            Utils.killChildren(self,self.output['pids'][self.output['jobs'][job]])
      #Check if solution has been found.
      if self.run_before == False:
        self.checkSolution()
      self.logger.debug('Phaser finished.')

    except:
      self.logger.exception('**ERROR in AutoMolRep.Queue**')
Exemple #5
0
    def run_queue(self):
        """
        queue system.
        """

        self.logger.debug("PDBQuery::run_queue")

        try:
            timed_out = False
            timer = 0
            if self.jobs != {}:
                jobs = self.jobs.keys()
                while len(jobs) != 0:
                    for job in jobs:
                        if job.is_alive() == False:
                            jobs.remove(job)
                            code = self.jobs.pop(job)
                            xutils.folders(self, "Phaser_%s" % code)
                            new_jobs = []
                            if self.test == False:
                                del self.pids[code]
                            #if self.verbose:
                            self.logger.debug("Finished Phaser on %s" % code)
                            p = self.postprocess_phaser(code)
                            # if p.count("rigid"):
                            #     if os.path.exists("rigid.log") == False:
                            #         j = Process(target=self.process_refine, args=(code, ))
                            #         j.start()
                            #         new_jobs.append(j)
                            #         if self.test:
                            #             time.sleep(5)
                            # if p.count("ADF"):
                            #     if os.path.exists("adf.com") == False:
                            #         j = Process(target=xutils.calcADF, args=(self, code))
                            #         j.start()
                            #         new_jobs.append(j)
                            # if len(new_jobs) > 0:
                            #     for j1 in new_jobs:
                            #         self.jobs[j1] = code
                            #         jobs.append(j1)
                    time.sleep(0.2)
                    timer += 0.2
                    if self.phaser_timer:
                        if timer >= self.phaser_timer:
                            timed_out = True
                            break
                if timed_out:
                    for j in self.jobs.values():
                        if self.pids.has_key(j):
                            if self.cluster_use:
                                # TODO
                                # BLspec.killChildrenCluster(self,self.pids[j])
                                pass
                            else:
                                xutils.killChildren(self, self.pids[j])
                        if self.phaser_results.has_key(j) == False:
                            self.phaser_results[j] = {"AutoMR results": Parse.setPhaserFailed("Timed out")}
                    if self.verbose:
                        self.logger.debug("PDBQuery timed out.")
                        print "PDBQuery timed out."
            if self.verbose:
                self.logger.debug("PDBQuery.run_queue finished.")

        except:
            self.logger.exception("**ERROR in PDBQuery.run_queue**")
Exemple #6
0
    def process_phaser(self):
        """Start Phaser for input pdb"""

        self.logger.debug("process_phaser")
        self.tprint("\nStarting molecular replacement", level=30, color="blue")

        # POOL = Pool(processes=4)
        #
        # def launch_job(inp):
        #     """Run a phaser process and retrieve results"""
        #
        #     print "launch_job", inp
        #
        #     queue = Queue()
        #     result = POOL.apply_async(phaser_func, (inp, queue, self.logger))
        #
        #     # queue = Queue()
        #     # job = Process(target=RunPhaser, args=(inp, queue, self.logger))
        #     # job.start()
        #     # # Get results
        #     # queue.get()  # For the log I don"t use
        #     # self.jobs[job] = inp["name"]
        #     # self.pids[inp["name"]] = queue.get()

        # Run through the pdbs
        self.tprint("  Assembling Phaser runs", level=10, color="white")
        commands = []
        for code in self.cell_output.keys():

            self.tprint("    %s" % code, level=30, color="white")

            l = False
            copy = 1

            # Create directory for MR
            xutils.create_folders(self.working_dir, "Phaser_%s" % code)

            # The cif file name
            cif_file = os.path.basename(self.cell_output[code].get("path"))
            # print "cif_file", cif_file
            gzip_file = cif_file+".gz"
            # print "gzip_file", gzip_file
            cached_file = False

            # Is the cif file in the local cache?
            if self.cif_cache:
                cached_file = os.path.join(self.cif_cache, gzip_file)
                # print "cached_file", cached_file
                if os.path.exists(cached_file):
                    self.tprint("      Have cached cif file %s" % gzip_file, level=10, color="white")

                else:
                    # Get the gzipped cif file from the PDBQ server
                    self.tprint("      Fetching %s" % cif_file, level=10, color="white")
                    try:
                        response = urllib2.urlopen(urllib2.Request(\
                                   "%s/entry/get_cif/%s" % \
                                   (PDBQ_SERVER, cif_file.replace(".cif", "")))\
                                   , timeout=60).read()
                    except urllib2.HTTPError as http_error:
                        self.tprint("      %s when fetching %s" % (http_error, cif_file),
                                    level=50,
                                    color="red")
                        continue

                    # Write the  gzip file
                    with open(cached_file, "wb") as outfile:
                        outfile.write(response)

                # Copy the gzip file to the cwd
                # print "Copying %s to %s" % (cached_file, os.path.join(os.getcwd(), gzip_file))
                shutil.copy(cached_file, os.path.join(os.getcwd(), gzip_file))

            # No local CIF file cache
            else:
                # Get the gzipped cif file from the PDBQ server
                self.tprint("      Fetching %s" % cif_file, level=10, color="white")
                try:
                    response = urllib2.urlopen(urllib2.Request(\
                               "%s/entry/get_cif/%s" % \
                               (PDBQ_SERVER, cif_file.replace(".cif", ""))), \
                               timeout=60).read()
                except urllib2.HTTPError as http_error:
                    self.tprint("      %s when fetching %s" % (http_error, cif_file),
                                level=50,
                                color="red")
                    continue

                # Write the  gzip file
                with open(gzip_file, "wb") as outfile:
                    outfile.write(response)

            # Uncompress the gzipped file
            unzip_proc = subprocess.Popen(["gunzip", gzip_file])
            unzip_proc.wait()

            # If mmCIF, checks if file exists or if it is super structure with
            # multiple PDB codes, and returns False, otherwise sends back SG.
            sg_pdb = xutils.fix_spacegroup(xutils.get_spacegroup_info(cif_file))

            # Remove codes that won't run or PDB/mmCIF's that don't exist.
            if sg_pdb == False:
                del self.cell_output[code]
                continue

            # Convert from cif to pdb
            conversion_proc = subprocess.Popen(["phenix.cif_as_pdb", cif_file],
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            conversion_proc.wait()
            cif_file = cif_file.replace(".cif", ".pdb")

            # Now check all SG's
            sg_num = xutils.convert_spacegroup(sg_pdb)
            lg_pdb = xutils.get_sub_groups(sg_num, "simple")
            self.tprint("      %s spacegroup: %s (%s)" % (cif_file, sg_pdb, sg_num),
                        level=10,
                        color="white")
            self.tprint("    subgroups: %s" % str(lg_pdb), level=10, color="white")

            # SG from data
            data_spacegroup = xutils.convert_spacegroup(self.laue, True)
            # self.tprint("      Data spacegroup: %s" % data_spacegroup, level=10, color="white")

            # Fewer mols in AU or in self.common.
            if code in self.common or float(self.laue) > float(lg_pdb):
                # if SM is lower sym, which will cause problems, since PDB is too big.
                # Need full path for copying pdb files to folders.
                pdb_info = xutils.get_pdb_info(os.path.join(os.getcwd(), cif_file),
                                               dres=self.dres,
                                               matthews=True,
                                               cell_analysis=False,
                                               data_file=self.datafile)
                #Prune if only one chain present, b/c "all" and "A" will be the same.
                if len(pdb_info.keys()) == 2:
                    for key in pdb_info.keys():
                        if key != "all":
                            del pdb_info[key]
                copy = pdb_info["all"]["NMol"]
                if copy == 0:
                    copy = 1
                # If pdb_info["all"]["res"] == 0.0:
                if pdb_info["all"]["SC"] < 0.2:
                    # Only run on chains that will fit in the AU.
                    l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0]

            # More mols in AU
            elif float(self.laue) < float(lg_pdb):
                pdb_info = xutils.get_pdb_info(cif_file=cif_file,
                                               dres=self.dres,
                                               matthews=True,
                                               cell_analysis=True,
                                               data_file=self.datafile)
                copy = pdb_info["all"]["NMol"]

            # Same number of mols in AU.
            else:
                pdb_info = xutils.get_pdb_info(cif_file=cif_file,
                                               dres=self.dres,
                                               matthews=False,
                                               cell_analysis=True,
                                               data_file=self.datafile)

            job_description = {
                "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % code)),
                "data": self.datafile,
                "pdb": cif_file,
                "name": code,
                "verbose": self.verbose,
                "sg": data_spacegroup,
                "copy": copy,
                "test": self.test,
                "cluster": self.cluster_use,
                "cell analysis": True,
                "large": self.large_cell,
                "res": xutils.set_phaser_res(pdb_info["all"]["res"], self.large_cell, self.dres),
                "timeout": self.phaser_timer}

            if not l:
                commands.append(job_description)
            else:
                # d1 = {}
                for chain in l:
                    new_code = "%s_%s" % (code, chain)
                    xutils.folders(self, "Phaser_%s" % new_code)
                    job_description.update({
                        "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \
                            new_code)),
                        "pdb":pdb_info[chain]["file"],
                        "name":new_code,
                        "copy":pdb_info[chain]["NMol"],
                        "res":xutils.set_phaser_res(pdb_info[chain]["res"],
                                                    self.large_cell,
                                                    self.dres)})

                    commands.append(job_description)

        # pprint(commands)
        # phaser_results = []
        # for command in commands:
        #     phaser_results.append(phaser_func(command))

        # Run in pool
        pool = Pool(2)
        self.tprint("    Initiating Phaser runs", level=10, color="white")
        results = pool.map_async(phaser_func, commands)
        pool.close()
        pool.join()
        phaser_results = results.get()

        return phaser_results
Exemple #7
0
    def process_phaser(self):
        """Start Phaser for input pdb"""

        self.logger.debug("process_phaser")
        self.tprint("\nStarting molecular replacement", level=30, color="blue")

        self.tprint("  Assembling Phaser runs", level=10, color="white")

        def launch_job(inp):
            """Launch the Phaser job"""
            #self.logger.debug("process_phaser Launching %s"%inp['name'])
            tag = 'Phaser_%d' % random.randint(0, 10000)
            if self.computer_cluster:
                # Create a unique identifier for Phaser results
                inp['tag'] = tag
                # Send Redis settings so results can be sent thru redis
                #inp['db_settings'] = self.site.CONTROL_DATABASE_SETTINGS
                # Don't need result queue since results will be sent via Redis
                queue = False
            else:
                inp['pool'] = self.pool
                # Add result queue
                queue = self.manager.Queue()
                inp['result_queue'] = queue

            #if self.pool:
            #    inp['pool'] = self.pool
            #else:
            #    inp['tag'] = tag
            #job, pid, tag = run_phaser(**inp)
            job, pid = run_phaser(**inp)
            self.jobs[job] = {
                'name': inp['name'],
                'pid': pid,
                'tag': tag,
                'result_queue': queue,
                'spacegroup': inp['spacegroup']  # Need for jobs that timeout.
            }

        # Run through the pdbs
        for pdb_code in self.cell_output.keys():

            self.tprint("    %s" % pdb_code, level=30, color="white")

            l = False
            copy = 1

            # Create directory for MR
            xutils.create_folders(self.working_dir, "Phaser_%s" % pdb_code)
            cif_file = pdb_code.lower() + ".cif"

            # Get the structure file
            if self.test and os.path.exists(cif_file):
                cif_path = os.path.join(os.getcwd(), cif_file)
            else:
                cif_path = self.repository.download_cif(
                    pdb_code, os.path.join(os.getcwd(), cif_file))

            if not cif_path:
                self.postprocess_invalid_code(pdb_code)
            else:
                # If mmCIF, checks if file exists or if it is super structure with
                # multiple PDB codes, and returns False, otherwise sends back SG.
                spacegroup_pdb = xutils.fix_spacegroup(
                    get_spacegroup_info(cif_path))
                if not spacegroup_pdb:
                    del self.cell_output[pdb_code]
                    continue

                # Now check all SG's
                spacegroup_num = xutils.convert_spacegroup(spacegroup_pdb)
                lg_pdb = xutils.get_sub_groups(spacegroup_num, "laue")
                self.tprint("      %s spacegroup: %s (%s)" %
                            (cif_path, spacegroup_pdb, spacegroup_num),
                            level=10,
                            color="white")
                self.tprint("      subgroups: %s" % str(lg_pdb),
                            level=10,
                            color="white")

                # SG from data
                data_spacegroup = xutils.convert_spacegroup(self.laue, True)
                # self.tprint("      Data spacegroup: %s" % data_spacegroup, level=10, color="white")

                # Fewer mols in AU or in common_contaminents.
                if pdb_code in self.common_contaminants or float(
                        self.laue) > float(lg_pdb):
                    # if SM is lower sym, which will cause problems, since PDB is too big.
                    pdb_info = get_pdb_info(struct_file=cif_path,
                                            data_file=self.data_file,
                                            dres=self.dres,
                                            matthews=True,
                                            chains=True)
                    # Prune if only one chain present, b/c "all" and "A" will be the same.
                    if len(pdb_info.keys()) == 2:
                        for key in pdb_info.keys():
                            if key != "all":
                                del pdb_info[key]
                    copy = pdb_info["all"]["NMol"]
                    if copy == 0:
                        copy = 1
                    # If pdb_info["all"]["res"] == 0.0:
                    if pdb_info["all"]["SC"] < 0.2:
                        # Only run on chains that will fit in the AU.
                        l = [
                            chain for chain in pdb_info.keys()
                            if pdb_info[chain]["res"] != 0.0
                        ]

                # More mols in AU
                elif float(self.laue) < float(lg_pdb):
                    pdb_info = get_pdb_info(struct_file=cif_path,
                                            data_file=self.data_file,
                                            dres=self.dres,
                                            matthews=True,
                                            chains=False)
                    copy = pdb_info["all"]["NMol"]

                # Same number of mols in AU.
                else:
                    pdb_info = get_pdb_info(struct_file=cif_path,
                                            data_file=self.data_file,
                                            dres=self.dres,
                                            matthews=False,
                                            chains=False)

                job_description = {
                    "work_dir":
                    os.path.abspath(
                        os.path.join(self.working_dir,
                                     "Phaser_%s" % pdb_code)),  #
                    "data_file":
                    self.data_file,
                    "struct_file":
                    cif_path,
                    "name":
                    pdb_code,  #
                    "spacegroup":
                    data_spacegroup,
                    "ncopy":
                    copy,  #
                    #"test": self.test,
                    "cell_analysis":
                    True,  #
                    #"large_cell": self.large_cell,
                    "resolution":
                    xutils.set_phaser_res(pdb_info["all"]["res"],
                                          self.large_cell, self.dres),
                    "launcher":
                    self.launcher,  #
                    "db_settings":
                    self.db_settings,  #
                    "tag":
                    False,  #
                    "batch_queue":
                    self.batch_queue,  #
                    "rapd_python":
                    self.rapd_python
                }

                if not l:
                    launch_job(job_description)
                else:
                    for chain in l:
                        new_code = "%s_%s" % (pdb_code, chain)
                        xutils.folders(self, "Phaser_%s" % new_code)
                        job_description.update({
                            "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \
                                new_code)),
                            "struct_file": pdb_info[chain]["file"],
                            "name":new_code,
                            "ncopy":pdb_info[chain]["NMol"],
                            "resolution":xutils.set_phaser_res(pdb_info[chain]["res"],
                                                        self.large_cell,
                                                        self.dres)})
                        launch_job(job_description)
Exemple #8
0
    def Queue(self):
        """
    queue system.
    """
        if self.verbose:
            self.logger.debug('AutoMolRep::Queue')
        try:
            timed_out = False
            timer = 0
            jobs = self.output['jobs'].keys()
            #set which jobs to watch.
            if self.run_before:
                jobs = [
                    job for job in jobs if self.output['jobs'][job][-1] == '1'
                ]
            else:
                jobs = [
                    job for job in jobs if self.output['jobs'][job][-1] == '0'
                ]
            counter = len(jobs)
            while counter != 0:
                for job in jobs:
                    if job.is_alive() == False:
                        jobs.remove(job)
                        if self.verbose:
                            self.logger.debug('Finished Phaser on %s' %
                                              self.output['jobs'][job])
                        Utils.folders(self, self.output['jobs'][job])
                        if self.adf:
                            if os.path.exists('adf.com'):
                                del self.output['pids'][self.output['jobs']
                                                        [job]]
                                counter -= 1
                            else:
                                key = self.output['jobs'].pop(job)
                                p = self.postprocessPhaser(key)
                                if p == 'ADF':
                                    #Calculate ADF map.
                                    adf = Process(target=Utils.calcADF,
                                                  name='ADF%s' % key,
                                                  args=(self, key))
                                    adf.start()
                                    jobs.append(adf)
                                    self.output['jobs'][adf] = key
                                else:
                                    counter -= 1
                                self.postprocess(False)
                        else:
                            self.postprocessPhaser(self.output['jobs'][job])
                            del self.output['pids'][self.output['jobs'][job]]
                            self.postprocess(False)
                            counter -= 1
                time.sleep(0.2)
                timer += 0.2
                if self.verbose:
                    number = round(timer % 1, 1)
                    if number in (0.0, 1.0):
                        print 'Waiting for Phaser to finish %s seconds' % timer
                if self.phaser_timer:
                    if timer >= self.phaser_timer:
                        timed_out = True
                        break
            if timed_out:
                self.logger.debug('Phaser timed out.')
                print 'Phaser timed out.'
                for job in jobs:
                    self.phaser_results[self.output['jobs'][job]] = {
                        'AutoMR results': Parse.setPhaserFailed('Timed out')
                    }
                    if self.cluster_use:
                        #Utils.killChildrenCluster(self,self.output['pids'][self.output['jobs'][job]])
                        BLspec.killChildrenCluster(
                            self,
                            self.output['pids'][self.output['jobs'][job]])
                    else:
                        Utils.killChildren(
                            self,
                            self.output['pids'][self.output['jobs'][job]])
            #Check if solution has been found.
            if self.run_before == False:
                self.checkSolution()
            self.logger.debug('Phaser finished.')

        except:
            self.logger.exception('**ERROR in AutoMolRep.Queue**')
Exemple #9
0
    def process_phaser(self):
        """Start Phaser for input pdb"""

        self.logger.debug("process_phaser")
        self.tprint("\nStarting molecular replacement", level=30, color="blue")

        self.tprint("  Assembling Phaser runs", level=10, color="white")

        def launch_job(inp):
            """Launch the Phaser job"""
            #self.logger.debug("process_phaser Launching %s"%inp['name'])
            if self.pool:
                inp['pool'] = self.pool
            job, pid, output_id = run_phaser(**inp)
            self.jobs[job] = {'name': inp['name'],
                              'pid' : pid,
                              'output_id' : output_id}

        # Run through the pdbs
        for pdb_code in self.cell_output.keys():

            self.tprint("    %s" % pdb_code, level=30, color="white")

            l = False
            copy = 1

            # Create directory for MR
            xutils.create_folders(self.working_dir, "Phaser_%s" % pdb_code)
            cif_file = pdb_code.lower() + ".cif"
            
            # Get the structure file
            if self.test and os.path.exists(cif_file):
                cif_path = os.path.join(os.getcwd(), cif_file)
            else:
                cif_path = self.repository.download_cif(pdb_code, os.path.join(os.getcwd(), cif_file))
            if not cif_path:
                self.postprocess_invalid_code(pdb_code)
            else:
                # If mmCIF, checks if file exists or if it is super structure with
                # multiple PDB codes, and returns False, otherwise sends back SG.
                spacegroup_pdb = xutils.fix_spacegroup(get_spacegroup_info(cif_path))
                if not spacegroup_pdb:
                    del self.cell_output[pdb_code]

                # Now check all SG's
                spacegroup_num = xutils.convert_spacegroup(spacegroup_pdb)
                lg_pdb = xutils.get_sub_groups(spacegroup_num, "laue")
                self.tprint("      %s spacegroup: %s (%s)" % (cif_path, spacegroup_pdb, spacegroup_num),
                            level=10,
                            color="white")
                self.tprint("      subgroups: %s" % str(lg_pdb), level=10, color="white")
    
                # SG from data
                data_spacegroup = xutils.convert_spacegroup(self.laue, True)
                # self.tprint("      Data spacegroup: %s" % data_spacegroup, level=10, color="white")
    
                # Fewer mols in AU or in common_contaminents.
                if pdb_code in self.common_contaminants or float(self.laue) > float(lg_pdb):
                    # if SM is lower sym, which will cause problems, since PDB is too big.
                    pdb_info = get_pdb_info(cif_file=cif_path,
                                            data_file=self.datafile,
                                            dres=self.dres,
                                            matthews=True,
                                            chains=True)
                    # Prune if only one chain present, b/c "all" and "A" will be the same.
                    if len(pdb_info.keys()) == 2:
                        for key in pdb_info.keys():
                            if key != "all":
                                del pdb_info[key]
                    copy = pdb_info["all"]["NMol"]
                    if copy == 0:
                        copy = 1
                    # If pdb_info["all"]["res"] == 0.0:
                    if pdb_info["all"]["SC"] < 0.2:
                        # Only run on chains that will fit in the AU.
                        l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0]
    
                # More mols in AU
                elif float(self.laue) < float(lg_pdb):
                    pdb_info = get_pdb_info(cif_file=cif_path,
                                            data_file=self.datafile,
                                            dres=self.dres,
                                            matthews=True,
                                            chains=False)
                    copy = pdb_info["all"]["NMol"]
    
                # Same number of mols in AU.
                else:
                    pdb_info = get_pdb_info(cif_file=cif_path,
                                            data_file=self.datafile,
                                            dres=self.dres,
                                            matthews=False,
                                            chains=False)
    
                job_description = {
                    "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % pdb_code)),
                    "datafile": self.datafile,
                    "cif": cif_path,
                    #"pdb": cif_path,
                    "name": pdb_code,
                    "spacegroup": data_spacegroup,
                    "ncopy": copy,
                    "test": self.test,
                    "cell_analysis": True,
                    "large_cell": self.large_cell,
                    "resolution": xutils.set_phaser_res(pdb_info["all"]["res"],
                                                 self.large_cell,
                                                 self.dres),
                    "launcher": self.launcher,
                    "db_settings": self.db_settings,
                    "output_id": False,
                    "batch_queue": self.batch_queue}
    
                if not l:
                    launch_job(job_description)
                else:
                    for chain in l:
                        new_code = "%s_%s" % (pdb_code, chain)
                        xutils.folders(self, "Phaser_%s" % new_code)
                        job_description.update({
                            "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \
                                new_code)),
                            "cif":pdb_info[chain]["file"],
                            #"pdb":pdb_info[chain]["file"],
                            "name":new_code,
                            "ncopy":pdb_info[chain]["NMol"],
                            "resolution":xutils.set_phaser_res(pdb_info[chain]["res"],
                                                        self.large_cell,
                                                        self.dres)})
                        launch_job(job_description)
Exemple #10
0
  def Queue(self):
    """
    queue system.
    """
    if self.verbose:
      self.logger.debug('PDBQuery::Queue')
    try:
      timed_out = False
      timer = 0
      if self.jobs != {}:
        jobs = self.jobs.keys()
        while len(jobs) != 0:
          for job in jobs:
            if job.is_alive() == False:
              jobs.remove(job)
              code = self.jobs.pop(job)
              Utils.folders(self,'Phaser_%s'%code)
              new_jobs = []
              if self.test == False:
                del self.pids[code]
              #if self.verbose:
              self.logger.debug('Finished Phaser on %s'%code)
              p = self.postprocessPhaser(code)
              if p.count('rigid'):
                if os.path.exists('rigid.log') == False:
                  j = Process(target=self.processRefine,args=(code,))
                  j.start()
                  new_jobs.append(j)
                  if self.test:
                    time.sleep(5)
              if p.count('ADF'):
                if os.path.exists('adf.com') == False:
                  j = Process(target=Utils.calcADF,args=(self,code))
                  j.start()
                  new_jobs.append(j)
              if len(new_jobs) > 0:
                for j1 in new_jobs:
                  self.jobs[j1] = code
                  jobs.append(j1)
          time.sleep(0.2)
          timer += 0.2
          if self.phaser_timer:
            if timer >= self.phaser_timer:
              timed_out = True
              break
        if timed_out:
          for j in self.jobs.values():
            if self.pids.has_key(j):
              if self.cluster_use:
                BLspec.killChildrenCluster(self,self.pids[j])
              else:
                Utils.killChildren(self,self.pids[j])
            if self.phaser_results.has_key(j) == False:
              self.phaser_results[j] = {'AutoMR results': Parse.setPhaserFailed('Timed out')}
          if self.verbose:
            self.logger.debug('PDBQuery timed out.')
            print 'PDBQuery timed out.'
      if self.verbose:
        self.logger.debug('PDBQuery.Queue finished.')

    except:
      self.logger.exception('**ERROR in PDBQuery.Queue**')
Exemple #11
0
  def processPhaser(self):
    """
    Start Phaser for input pdb.
    """
    if self.verbose:
      self.logger.debug('PDBQuery::processPhaser')

    def launchJob(inp):
      queue = Queue()
      job = Process(target=RunPhaser,args=(inp,queue,self.logger))
      job.start()
      queue.get()#For the log I don't use
      self.jobs[job] = inp['name']
      self.pids[inp['name']] = queue.get()

    try:
      for code in self.cell_output.keys():
      #for code in ['4ER2']:
        l = False
        copy = 1
        Utils.folders(self,'Phaser_%s'%code)
        f = os.path.basename(self.cell_output[code].get('path'))
        #Check if symlink exists and create if not.
        if os.path.exists(f) == False:
          os.symlink(self.cell_output[code].get('path'),f)
        #If mmCIF, checks if file exists or if it is super structure with
        #multiple PDB codes, and returns False, otherwise sends back SG.
        sg_pdb = Utils.fixSG(self,Utils.getSGInfo(self,f))
        #Remove codes that won't run or PDB/mmCIF's that don't exist.
        if sg_pdb == False:
          del self.cell_output[code]
          continue
        #**Now check all SG's**
        lg_pdb = Utils.subGroups(self,Utils.convertSG(self,sg_pdb),'simple')
        #SG from data
        sg = Utils.convertSG(self,self.laue,True)
        #Fewer mols in AU or in self.common.
        if code in self.common or float(self.laue) > float(lg_pdb):
          #if SM is lower sym, which will cause problems, since PDB is too big.
          #Need full path for copying pdb files to folders.
          pdb_info = Utils.getPDBInfo(self,os.path.join(os.getcwd(),f))
          #Prune if only one chain present, b/c 'all' and 'A' will be the same.
          if len(pdb_info.keys()) == 2:
            for key in pdb_info.keys():
              if key != 'all':
                del pdb_info[key]
          copy = pdb_info['all']['NMol']
          if copy == 0:
            copy = 1
          #if pdb_info['all']['res'] == 0.0:
          if pdb_info['all']['SC'] < 0.2:
            #Only run on chains that will fit in the AU.
            l = [chain for chain in pdb_info.keys() if pdb_info[chain]['res'] != 0.0]
        #More mols in AU
        elif float(self.laue) < float(lg_pdb):
          pdb_info = Utils.getPDBInfo(self,f,True,True)
          copy = pdb_info['all']['NMol']
        #Same number of mols in AU.
        else:
          pdb_info = Utils.getPDBInfo(self,f,False,True)

        d = {'data':self.datafile,'pdb':f,'name':code,'verbose':self.verbose,'sg':sg,
             'copy':copy,'test':self.test,'cluster':self.cluster_use,'cell analysis':True,
             'large':self.large_cell,'res':Utils.setPhaserRes(self,pdb_info['all']['res']),
            }

        if l == False:
          launchJob(d)
        else:
          d1 = {}
          for chain in l:
            new_code = '%s_%s'%(code,chain)
            Utils.folders(self,'Phaser_%s'%new_code)
            d.update({'pdb':pdb_info[chain]['file'],'name':new_code,'copy':pdb_info[chain]['NMol'],
                      'res':Utils.setPhaserRes(self,pdb_info[chain]['res'])})
            launchJob(d)

    except:
      self.logger.exception('**ERROR in PDBQuery.processPhaser**')
Exemple #12
0
    def run_queue(self):
        """
        queue system.
        """
        if self.verbose:
            self.logger.debug("PDBQuery::run_queue")

        try:
            timed_out = False
            timer = 0
            if self.jobs != {}:
                jobs = self.jobs.keys()
                while len(jobs) != 0:
                    for job in jobs:
                        if job.is_alive() == False:
                            jobs.remove(job)
                            code = self.jobs.pop(job)
                            Utils.folders(self, "Phaser_%s" % code)
                            new_jobs = []
                            if self.test == False:
                                del self.pids[code]
                            #if self.verbose:
                            self.logger.debug("Finished Phaser on %s" % code)
                            p = self.postprocess_phaser(code)
                            if p.count("rigid"):
                                if os.path.exists("rigid.log") == False:
                                    j = Process(target=self.process_refine, args=(code, ))
                                    j.start()
                                    new_jobs.append(j)
                                    if self.test:
                                        time.sleep(5)
                            if p.count("ADF"):
                                if os.path.exists("adf.com") == False:
                                    j = Process(target=Utils.calcADF, args=(self, code))
                                    j.start()
                                    new_jobs.append(j)
                            if len(new_jobs) > 0:
                                for j1 in new_jobs:
                                    self.jobs[j1] = code
                                    jobs.append(j1)
                    time.sleep(0.2)
                    timer += 0.2
                    if self.phaser_timer:
                        if timer >= self.phaser_timer:
                            timed_out = True
                            break
                if timed_out:
                    for j in self.jobs.values():
                        if self.pids.has_key(j):
                            if self.cluster_use:
                                # TODO
                                # BLspec.killChildrenCluster(self,self.pids[j])
                                pass
                            else:
                                Utils.killChildren(self, self.pids[j])
                        if self.phaser_results.has_key(j) == False:
                            self.phaser_results[j] = {"AutoMR results": Parse.setPhaserFailed("Timed out")}
                    if self.verbose:
                        self.logger.debug("PDBQuery timed out.")
                        print "PDBQuery timed out."
            if self.verbose:
                self.logger.debug("PDBQuery.run_queue finished.")

        except:
            self.logger.exception("**ERROR in PDBQuery.run_queue**")
Exemple #13
0
    def process_phaser(self):
        """
        Start Phaser for input pdb.
        """
        if self.verbose:
            self.logger.debug("PDBQuery::process_phaser")

        def launch_job(inp):
            queue = Queue()
            job = Process(target=RunPhaser, args=(inp, queue, self.logger))
            job.start()
            queue.get()  # For the log I don"t use
            self.jobs[job] = inp["name"]
            self.pids[inp["name"]] = queue.get()

        try:
            for code in self.cell_output.keys():
              #for code in ["4ER2"]:
                l = False
                copy = 1
                Utils.folders(self, "Phaser_%s" % code)
                f = os.path.basename(self.cell_output[code].get("path"))
                #Check if symlink exists and create if not.
                if os.path.exists(f) == False:
                    os.symlink(self.cell_output[code].get("path"), f)
                #If mmCIF, checks if file exists or if it is super structure with
                #multiple PDB codes, and returns False, otherwise sends back SG.
                sg_pdb = Utils.fixSG(self, Utils.getSGInfo(self, f))
                #Remove codes that won't run or PDB/mmCIF's that don't exist.
                if sg_pdb == False:
                    del self.cell_output[code]
                    continue
                #**Now check all SG's**
                lg_pdb = Utils.subGroups(self, Utils.convertSG(self, sg_pdb), "simple")
                #SG from data
                sg = Utils.convertSG(self, self.laue, True)
                #Fewer mols in AU or in self.common.
                if code in self.common or float(self.laue) > float(lg_pdb):
                    #if SM is lower sym, which will cause problems, since PDB is too big.
                    #Need full path for copying pdb files to folders.
                    pdb_info = Utils.getPDBInfo(self, os.path.join(os.getcwd(), f))
                    #Prune if only one chain present, b/c "all" and "A" will be the same.
                    if len(pdb_info.keys()) == 2:
                        for key in pdb_info.keys():
                            if key != "all":
                                del pdb_info[key]
                    copy = pdb_info["all"]["NMol"]
                    if copy == 0:
                        copy = 1
                    # If pdb_info["all"]["res"] == 0.0:
                    if pdb_info["all"]["SC"] < 0.2:
                        # Only run on chains that will fit in the AU.
                        l = [chain for chain in pdb_info.keys() if pdb_info[chain]["res"] != 0.0]
                #More mols in AU
                elif float(self.laue) < float(lg_pdb):
                    pdb_info = Utils.getPDBInfo(self, f, True, True)
                    copy = pdb_info["all"]["NMol"]
                #Same number of mols in AU.
                else:
                    pdb_info = Utils.getPDBInfo(self, f, False, True)

                d = {"data":self.datafile, "pdb":f, "name":code, "verbose":self.verbose, "sg":sg,
                     "copy":copy, "test":self.test, "cluster":self.cluster_use, "cell analysis":True,
                     "large":self.large_cell, "res":Utils.setPhaserRes(self, pdb_info["all"]["res"]),
                    }

                if l == False:
                    launch_job(d)
                else:
                    d1 = {}
                    for chain in l:
                        new_code = "%s_%s" % (code, chain)
                        Utils.folders(self, "Phaser_%s" % new_code)
                        d.update({"pdb":pdb_info[chain]["file"], "name":new_code, "copy":pdb_info[chain]["NMol"],
                                  "res":Utils.setPhaserRes(self, pdb_info[chain]["res"])})
                        launch_job(d)

        except:
            self.logger.exception("**ERROR in PDBQuery.process_phaser**")
Exemple #14
0
    def process_phaser(self):
        """Start Phaser for input pdb"""

        self.logger.debug("process_phaser")
        self.tprint("\nStarting molecular replacement", level=30, color="blue")

        # POOL = Pool(processes=4)
        #
        # def launch_job(inp):
        #     """Run a phaser process and retrieve results"""
        #
        #     print "launch_job", inp
        #
        #     queue = Queue()
        #     result = POOL.apply_async(phaser_func, (inp, queue, self.logger))
        #
        #     # queue = Queue()
        #     # job = Process(target=RunPhaser, args=(inp, queue, self.logger))
        #     # job.start()
        #     # # Get results
        #     # queue.get()  # For the log I don"t use
        #     # self.jobs[job] = inp["name"]
        #     # self.pids[inp["name"]] = queue.get()

        # Run through the pdbs
        self.tprint("  Assembling Phaser runs", level=10, color="white")
        commands = []
        for code in self.cell_output.keys():

            self.tprint("    %s" % code, level=30, color="white")

            l = False
            copy = 1

            # Create directory for MR
            xutils.create_folders(self.working_dir, "Phaser_%s" % code)

            # The cif file name
            cif_file = os.path.basename(self.cell_output[code].get("path"))
            # print "cif_file", cif_file
            gzip_file = cif_file + ".gz"
            # print "gzip_file", gzip_file
            cached_file = False

            # Is the cif file in the local cache?
            if self.cif_cache:
                cached_file = os.path.join(self.cif_cache, gzip_file)
                # print "cached_file", cached_file
                if os.path.exists(cached_file):
                    self.tprint("      Have cached cif file %s" % gzip_file,
                                level=10,
                                color="white")

                else:
                    # Get the gzipped cif file from the PDBQ server
                    self.tprint("      Fetching %s" % cif_file,
                                level=10,
                                color="white")
                    try:
                        response = urllib2.urlopen(urllib2.Request(\
                                   "%s/entry/get_cif/%s" % \
                                   (PDBQ_SERVER, cif_file.replace(".cif", "")))\
                                   , timeout=60).read()
                    except urllib2.HTTPError as http_error:
                        self.tprint("      %s when fetching %s" %
                                    (http_error, cif_file),
                                    level=50,
                                    color="red")
                        continue

                    # Write the  gzip file
                    with open(cached_file, "wb") as outfile:
                        outfile.write(response)

                # Copy the gzip file to the cwd
                # print "Copying %s to %s" % (cached_file, os.path.join(os.getcwd(), gzip_file))
                shutil.copy(cached_file, os.path.join(os.getcwd(), gzip_file))

            # No local CIF file cache
            else:
                # Get the gzipped cif file from the PDBQ server
                self.tprint("      Fetching %s" % cif_file,
                            level=10,
                            color="white")
                try:
                    response = urllib2.urlopen(urllib2.Request(\
                               "%s/entry/get_cif/%s" % \
                               (PDBQ_SERVER, cif_file.replace(".cif", ""))), \
                               timeout=60).read()
                except urllib2.HTTPError as http_error:
                    self.tprint("      %s when fetching %s" %
                                (http_error, cif_file),
                                level=50,
                                color="red")
                    continue

                # Write the  gzip file
                with open(gzip_file, "wb") as outfile:
                    outfile.write(response)

            # Uncompress the gzipped file
            unzip_proc = subprocess.Popen(["gunzip", gzip_file])
            unzip_proc.wait()

            # If mmCIF, checks if file exists or if it is super structure with
            # multiple PDB codes, and returns False, otherwise sends back SG.
            sg_pdb = xutils.fix_spacegroup(
                xutils.get_spacegroup_info(cif_file))

            # Remove codes that won't run or PDB/mmCIF's that don't exist.
            if sg_pdb == False:
                del self.cell_output[code]
                continue

            # Convert from cif to pdb
            conversion_proc = subprocess.Popen(["phenix.cif_as_pdb", cif_file],
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            conversion_proc.wait()
            cif_file = cif_file.replace(".cif", ".pdb")

            # Now check all SG's
            sg_num = xutils.convert_spacegroup(sg_pdb)
            lg_pdb = xutils.get_sub_groups(sg_num, "simple")
            self.tprint("      %s spacegroup: %s (%s)" %
                        (cif_file, sg_pdb, sg_num),
                        level=10,
                        color="white")
            self.tprint("    subgroups: %s" % str(lg_pdb),
                        level=10,
                        color="white")

            # SG from data
            data_spacegroup = xutils.convert_spacegroup(self.laue, True)
            # self.tprint("      Data spacegroup: %s" % data_spacegroup, level=10, color="white")

            # Fewer mols in AU or in self.common.
            if code in self.common or float(self.laue) > float(lg_pdb):
                # if SM is lower sym, which will cause problems, since PDB is too big.
                # Need full path for copying pdb files to folders.
                pdb_info = xutils.get_pdb_info(os.path.join(
                    os.getcwd(), cif_file),
                                               dres=self.dres,
                                               matthews=True,
                                               cell_analysis=False,
                                               data_file=self.datafile)
                #Prune if only one chain present, b/c "all" and "A" will be the same.
                if len(pdb_info.keys()) == 2:
                    for key in pdb_info.keys():
                        if key != "all":
                            del pdb_info[key]
                copy = pdb_info["all"]["NMol"]
                if copy == 0:
                    copy = 1
                # If pdb_info["all"]["res"] == 0.0:
                if pdb_info["all"]["SC"] < 0.2:
                    # Only run on chains that will fit in the AU.
                    l = [
                        chain for chain in pdb_info.keys()
                        if pdb_info[chain]["res"] != 0.0
                    ]

            # More mols in AU
            elif float(self.laue) < float(lg_pdb):
                pdb_info = xutils.get_pdb_info(cif_file=cif_file,
                                               dres=self.dres,
                                               matthews=True,
                                               cell_analysis=True,
                                               data_file=self.datafile)
                copy = pdb_info["all"]["NMol"]

            # Same number of mols in AU.
            else:
                pdb_info = xutils.get_pdb_info(cif_file=cif_file,
                                               dres=self.dres,
                                               matthews=False,
                                               cell_analysis=True,
                                               data_file=self.datafile)

            job_description = {
                "work_dir":
                os.path.abspath(
                    os.path.join(self.working_dir, "Phaser_%s" % code)),
                "data":
                self.datafile,
                "pdb":
                cif_file,
                "name":
                code,
                "verbose":
                self.verbose,
                "sg":
                data_spacegroup,
                "copy":
                copy,
                "test":
                self.test,
                "cluster":
                self.cluster_use,
                "cell analysis":
                True,
                "large":
                self.large_cell,
                "res":
                xutils.set_phaser_res(pdb_info["all"]["res"], self.large_cell,
                                      self.dres),
                "timeout":
                self.phaser_timer
            }

            if not l:
                commands.append(job_description)
            else:
                # d1 = {}
                for chain in l:
                    new_code = "%s_%s" % (code, chain)
                    xutils.folders(self, "Phaser_%s" % new_code)
                    job_description.update({
                        "work_dir": os.path.abspath(os.path.join(self.working_dir, "Phaser_%s" % \
                            new_code)),
                        "pdb":pdb_info[chain]["file"],
                        "name":new_code,
                        "copy":pdb_info[chain]["NMol"],
                        "res":xutils.set_phaser_res(pdb_info[chain]["res"],
                                                    self.large_cell,
                                                    self.dres)})

                    commands.append(job_description)

        # pprint(commands)
        # phaser_results = []
        # for command in commands:
        #     phaser_results.append(phaser_func(command))

        # Run in pool
        pool = Pool(2)
        self.tprint("    Initiating Phaser runs", level=10, color="white")
        results = pool.map_async(phaser_func, commands)
        pool.close()
        pool.join()
        phaser_results = results.get()

        return phaser_results