Python runCCA Exemples, phaser.runCCA Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : phaser_preflight.py Projet : bopopescu/RAPD

 def run_cca(run_mr, target_resolution, args):
     # print "run_cca"
     z0 = 0
     solvent_content = 0.0
     i0 = phaser.InputCCA()
     i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
     i0.setCELL6(run_mr.getUnitCell())
     i0.setMUTE(True)
     # Have to set high res limit!!
     i0.setRESO_HIGH(target_resolution)
     if args.np > 0:
         i0.addCOMP_PROT_NRES_NUM(args.np, 1)
     if args.na > 0:
         i0.addCOMP_NUCL_NRES_NUM(args.na, 1)
     r1 = phaser.runCCA(i0)
     if r1.Success():
         z0 = r1.getBestZ()
         solvent_content = 1 - (1.23 / r1.getBestVM())
     del r1
     return (z0, solvent_content)

Exemple #2

0

Afficher le fichier

Fichier : rapd_phaser.py Projet : RAPD/RAPD

 def run_cca():
     z0 = 0
     sc0 = 0.0
     i0 = phaser.InputCCA()
     i0.setSPAC_HALL(r.getSpaceGroupHall())
     i0.setCELL6(r.getUnitCell())
     i0.setMUTE(True)
     # Have to set high res limit!!
     i0.setRESO_HIGH(res0)
     if np > 0:
         i0.addCOMP_PROT_NRES_NUM(np, 1)
     if na > 0:
         i0.addCOMP_NUCL_NRES_NUM(na, 1)
     r1 = phaser.runCCA(i0)
     #print r1.logfile()
     if r1.Success():
         z0 = r1.getBestZ()
         sc0 = 1-(1.23/r1.getBestVM())
     del(r1)
     return (z0, sc0)

Exemple #3

0

Afficher le fichier

Fichier : rapd_phaser.py Projet : bopopescu/RAPD

 def run_cca():
     z0 = 0
     sc0 = 0.0
     i0 = phaser.InputCCA()
     i0.setSPAC_HALL(r.getSpaceGroupHall())
     i0.setCELL6(r.getUnitCell())
     i0.setMUTE(True)
     # Have to set high res limit!!
     i0.setRESO_HIGH(res0)
     if np > 0:
         i0.addCOMP_PROT_NRES_NUM(np, 1)
     if na > 0:
         i0.addCOMP_NUCL_NRES_NUM(na, 1)
     r1 = phaser.runCCA(i0)
     #print r1.logfile()
     if r1.Success():
         z0 = r1.getBestZ()
         sc0 = 1 - (1.23 / r1.getBestVM())
     del (r1)
     return (z0, sc0)

Exemple #4

0

Afficher le fichier

Fichier : phaser_preflight.py Projet : RAPD/RAPD

 def run_cca(run_mr, target_resolution, args):
     # print "run_cca"
     z0 = 0
     solvent_content = 0.0
     i0 = phaser.InputCCA()
     i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
     i0.setCELL6(run_mr.getUnitCell())
     i0.setMUTE(True)
     # Have to set high res limit!!
     i0.setRESO_HIGH(target_resolution)
     if args.np > 0:
         i0.addCOMP_PROT_NRES_NUM(args.np, 1)
     if args.na > 0:
         i0.addCOMP_NUCL_NRES_NUM(args.na, 1)
     r1 = phaser.runCCA(i0)
     if r1.Success():
         z0 = r1.getBestZ()
         solvent_content = 1-(1.23/r1.getBestVM())
     del r1
     return (z0, solvent_content)

Exemple #5

0

Afficher le fichier

def calculate_solvent(root, data, seqin, ncs_copies, highres, logfile):
  input = phaser.InputCCA()
  input.setSPAC_HALL(data.getSpaceGroupHall())
  input.setCELL6(data.getUnitCell())
  input.addCOMP_PROT_SEQ_NUM(seqin, ncs_copies)
  input.setRESO(highres, 10000)
  input.setMUTE(True)
  cca = phaser.runCCA(input)
  with open(logfile, 'w') as cca_log:
    print(data.logfile(), file=cca_log)
  # check if solvent content is higher than average
  if cca.getBestZ() > 1:
    log.warning ('*** Warning solvent content estimated by Phaser is %0.2f' % (1.0 - 1.232/cca.getBestVM()))
    if ncs_copies > 1:
      log.warning('    solvent content calculated from %d copies of sequence is %0.2f\n' % (ncs_copies, (1.0 - 1.232/cca.getVM()[0])))
    else:
      log.warning('    solvent content calculated from 1 copy of sequence is %0.2f\n' % (1.0 - 1.232/cca.getVM()[0]))
  elif ncs_copies > 1:
    log.info('Solvent content calculated from %d copies of sequence is %0.2f\n' % (ncs_copies, (1.0 - 1.232/cca.getVM()[0])))
  else:
    log.info('Solvent content calculated from 1 copy of sequence is %0.2f\n' % (1.0 - 1.232/cca.getVM()[0]))

  return cca.getBestZ(), cca.getBestVM(), cca.getZ()[0], cca.getVM()[0]

Exemple #6

0

Afficher le fichier

    def run(self,
            models_dir,
            nproc=2,
            shres=3.0,
            pklim=0.5,
            npic=50,
            rotastep=1.0,
            min_solvent_content=20,
            submit_nproc=None,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run amore rotation function on a directory of models

        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        shres : int, float, optional
            Spherical harmonic resolution [default 3.0]
        pklim : int, float, optional
            Peak limit, output all peaks above <float> [default: 0.5]
        npic : int, optional
            Number of peaks to output from the translation function map for each orientation [default: 50]
        rotastep : int, float, optional
            Size of rotation step [default : 1.0]
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_nproc : int
            The number of processors to use on the head node when creating submission scripts on a cluster [default: 1]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir

        """
        self.shres = shres
        self.pklim = pklim
        self.npic = npic
        self.rotastep = rotastep

        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)

        mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setLABI_F_SIGF(mtz_labels.f, mtz_labels.sigf)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        self.script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(self.script_log_dir)

        self.hklpck0 = self._generate_hklpck0()

        self.ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            self.template_tmp_dir = os.path.join(self.tmp_dir,
                                                 dir_name + "-{0}")
        else:
            self.template_tmp_dir = os.path.join(default_tmp_dir,
                                                 dir_name + "-{0}")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            try:
                solvent_content = sol_calc.calculate_from_struct(pdb_struct)
                if solvent_content < min_solvent_content:
                    msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                    logger.debug(msg, name, min_solvent_content)
                    continue
            except ValueError:
                msg = "Skipping %s: Error calculating solvent content"
                logger.debug(msg, name)
                continue
            except IndexError:
                msg = "Skipping %s: Problem with dat file"
                logger.debug(msg, name)
                continue

            x, y, z, intrad = pdb_struct.integration_box
            model_molecular_weight = pdb_struct.molecular_weight
            mw_diff = abs(predicted_molecular_weight - model_molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model,
                                                       mw_diff, x, y, z,
                                                       intrad, solvent_content,
                                                       None)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models,
                                   key=lambda x: float(x.mw_diff),
                                   reverse=False)
        n_files = len(sorted_dat_models)
        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(
            n_files, chunk_size)

        if submit_qtype == 'local':
            processes = nproc
        else:
            processes = submit_nproc

        results = []
        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1,
                        total_chunk_cycles)

            if self.solution:
                logger.info(
                    "Early termination criteria met, skipping chunk %d",
                    cycle + 1)
                continue

            collector = ScriptCollector(None)
            amore_files = []
            with pool.Pool(processes=processes) as p:
                [(collector.add(i[0]), amore_files.append(i[1]))
                 for i in p.map(self, sorted_dat_models[i:i + chunk_size])
                 if i is not None]

            if len(collector.scripts) > 0:
                logger.info("Running AMORE tab/rot functions")
                amore_logs, dat_models = zip(*amore_files)
                simbad.util.submit_chunk(collector, self.script_log_dir, nproc,
                                         'simbad_amore', submit_qtype,
                                         submit_queue, True, monitor,
                                         self.rot_succeeded_log)

                for dat_model, amore_log in zip(dat_models, amore_logs):
                    base = os.path.basename(amore_log)
                    pdb_code = base.replace("amore_", "").replace(".log", "")
                    try:
                        rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser(
                            amore_log)
                        score = simbad.core.amore_score.AmoreRotationScore(
                            pdb_code, dat_model, rotsearch_parser.alpha,
                            rotsearch_parser.beta, rotsearch_parser.gamma,
                            rotsearch_parser.cc_f, rotsearch_parser.rf_f,
                            rotsearch_parser.cc_i, rotsearch_parser.cc_p,
                            rotsearch_parser.icp,
                            rotsearch_parser.cc_f_z_score,
                            rotsearch_parser.cc_p_z_score,
                            rotsearch_parser.num_of_rot)
                        if rotsearch_parser.cc_f_z_score:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

        self._search_results = results
        shutil.rmtree(self.script_log_dir)

        if os.path.isdir(default_tmp_dir):
            shutil.rmtree(default_tmp_dir)

Exemple #7

0

Afficher le fichier

Fichier : phaser_search.py Projet : fsimkovic/SIMBAD

    def run(self,
            models_dir,
            nproc=2,
            min_solvent_content=20,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run phaser rotation function on a directory of models
        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir
        """
        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue
        self.f, self.sigf, self.i, self.sigi, _, _, _ = simbad.util.mtz_util.get_labels(self.mtz)

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)
        n_files = len(self.simbad_dat_files)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size)

        mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(script_log_dir)

        ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}")
        else:
            template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct(pdb_struct)
            solvent_content = solvent_fraction * 100
            if solvent_content < min_solvent_content:
                msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                logger.debug(msg, name, min_solvent_content)
                continue
            mw_diff = abs(predicted_molecular_weight - pdb_struct.molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, None, None, None, None,
                                                       solvent_fraction, n_copies)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False)

        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles)

            template_model = os.path.join("$CCP4_SCR", "{0}.pdb")

            phaser_files = []
            for dat_model in sorted_dat_models[i:i + chunk_size]:
                logger.debug("Generating script to perform PHASER rotation " + "function on %s", dat_model.pdb_code)

                pdb_model = template_model.format(dat_model.pdb_code)
                template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log")

                conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\""
                conv_py = conv_py.format(dat_model.dat_path, pdb_model)

                rot_log = template_rot_log.format(dat_model.pdb_code)
                tmp_dir = template_tmp_dir.format(dat_model.pdb_code)

                phaser_cmd = [
                    "simbad.rotsearch.phaser_rotation_search",
                    "-hklin",
                    self.mtz,
                    "-f",
                    self.f,
                    "-sigf",
                    self.sigf,
                    "-i",
                    self.i,
                    "-sigi",
                    self.sigi,
                    "-pdbin",
                    pdb_model,
                    "-logfile",
                    rot_log,
                    "-solvent",
                    dat_model.solvent,
                    "-nmol",
                    dat_model.nmol,
                    "-work_dir",
                    tmp_dir,
                ]
                phaser_cmd = " ".join(str(e) for e in phaser_cmd)

                cmd = [
                    [EXPORT, "CCP4_SCR=" + tmp_dir],
                    ["mkdir", "-p", "$CCP4_SCR\n"],
                    [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep],
                    [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-m", phaser_cmd, os.linesep],
                    ["rm", "-rf", "$CCP4_SCR\n"],
                    [EXPORT, "CCP4_SCR=" + ccp4_scr],
                ]
                phaser_script = pyjob.misc.make_script(
                    cmd, directory=script_log_dir, prefix="phaser_", stem=dat_model.pdb_code)
                phaser_log = phaser_script.rsplit(".", 1)[0] + '.log'
                phaser_files += [(phaser_script, phaser_log, dat_model.dat_path)]

            results = []
            if len(phaser_files) > 0:
                logger.info("Running PHASER rotation functions")
                phaser_scripts, phaser_logs, dat_models = zip(*phaser_files)
                simbad.rotsearch.submit_chunk(phaser_scripts, script_log_dir, nproc, 'simbad_phaser', submit_qtype,
                                              submit_queue, monitor, self.rot_succeeded_log)

                for dat_model, phaser_log in zip(dat_models, phaser_logs):
                    base = os.path.basename(phaser_log)
                    pdb_code = base.replace("phaser_", "").replace(".log", "")
                    try:
                        phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser(phaser_log)
                        if phaser_rotation_parser.rfact:
                            phaser_rotation_parser.llg = 100
                            phaser_rotation_parser.rfz = 10
                        score = simbad.core.phaser_score.PhaserRotationScore(
                            pdb_code, dat_model, phaser_rotation_parser.llg, phaser_rotation_parser.rfz)

                        if phaser_rotation_parser.rfz:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

            self._search_results = results
            shutil.rmtree(script_log_dir)

            if os.path.isdir(default_tmp_dir):
                shutil.rmtree(default_tmp_dir)

Exemple #8

0

Afficher le fichier

Fichier : phaser_search.py Projet : hlasimpk/SIMBAD

    def run(self,
            models_dir,
            nproc=2,
            min_solvent_content=20,
            submit_nproc=None,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run phaser rotation function on a directory of models
        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_nproc : int
            The number of processors to use on the head node when creating submission scripts on a cluster [default: 1]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir
        """
        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue
        self.mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz)

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setLABI_F_SIGF(self.mtz_labels.f, self.mtz_labels.sigf)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        self.script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(self.script_log_dir)

        self.ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            self.template_tmp_dir = os.path.join(self.tmp_dir,
                                                 dir_name + "-{0}")
        else:
            self.template_tmp_dir = os.path.join(default_tmp_dir,
                                                 dir_name + "-{0}")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct(
                pdb_struct)
            solvent_content = solvent_fraction * 100
            if solvent_content < min_solvent_content:
                msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                logger.debug(msg, name, min_solvent_content)
                continue
            mw_diff = abs(predicted_molecular_weight -
                          pdb_struct.molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model,
                                                       mw_diff, None, None,
                                                       None, None,
                                                       solvent_fraction,
                                                       n_copies)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models,
                                   key=lambda x: float(x.mw_diff),
                                   reverse=False)
        n_files = len(sorted_dat_models)
        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(
            n_files, chunk_size)

        results = []
        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1,
                        total_chunk_cycles)

            if self.solution:
                logger.info(
                    "Early termination criteria met, skipping chunk %d",
                    cycle + 1)
                continue

            self.template_model = os.path.join("$CCP4_SCR", "{0}.pdb")

            if submit_qtype == 'local':
                processes = nproc
            else:
                processes = submit_nproc

            collector = ScriptCollector(None)
            phaser_files = []
            with pool.Pool(processes=processes) as p:
                [(collector.add(i[0]), phaser_files.append(i[1]))
                 for i in p.map(self, sorted_dat_models[i:i + chunk_size])
                 if i is not None]

            if len(phaser_files) > 0:
                logger.info("Running PHASER rotation functions")
                phaser_logs, dat_models = zip(*phaser_files)
                simbad.util.submit_chunk(collector, self.script_log_dir, nproc,
                                         'simbad_phaser', submit_qtype,
                                         submit_queue, True, monitor,
                                         self.rot_succeeded_log)

                for dat_model, phaser_log in zip(dat_models, phaser_logs):
                    base = os.path.basename(phaser_log)
                    pdb_code = base.replace("phaser_", "").replace(".log", "")
                    try:
                        phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser(
                            phaser_log)
                        if phaser_rotation_parser.rfact:
                            phaser_rotation_parser.llg = 100
                            phaser_rotation_parser.rfz = 10
                        score = simbad.core.phaser_score.PhaserRotationScore(
                            pdb_code, dat_model, phaser_rotation_parser.llg,
                            phaser_rotation_parser.rfz)

                        if phaser_rotation_parser.rfz:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

        self._search_results = results
        shutil.rmtree(self.script_log_dir)

        if os.path.isdir(default_tmp_dir):
            shutil.rmtree(default_tmp_dir)

Exemple #9

0

Afficher le fichier

Fichier : amore_search.py Projet : fsimkovic/SIMBAD

    def run(self,
            models_dir,
            nproc=2,
            shres=3.0,
            pklim=0.5,
            npic=50,
            rotastep=1.0,
            min_solvent_content=20,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run amore rotation function on a directory of models

        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        shres : int, float, optional
            Spherical harmonic resolution [default 3.0]
        pklim : int, float, optional
            Peak limit, output all peaks above <float> [default: 0.5]
        npic : int, optional
            Number of peaks to output from the translation function map for each orientation [default: 50]
        rotastep : int, float, optional
            Size of rotation step [default : 1.0]
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir

        """
        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)
        n_files = len(self.simbad_dat_files)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size)

        sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(script_log_dir)

        hklpck0 = self._generate_hklpck0()

        ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}")
        else:
            template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}")

        template_hklpck1 = os.path.join("$CCP4_SCR", "{0}.hkl")
        template_clmn0 = os.path.join("$CCP4_SCR", "{0}_spmipch.clmn")
        template_clmn1 = os.path.join("$CCP4_SCR", "{0}.clmn")
        template_mapout = os.path.join("$CCP4_SCR", "{0}_amore_cross.map")
        template_table1 = os.path.join("$CCP4_SCR", "{0}_sfs.tab")
        template_model = os.path.join("$CCP4_SCR", "{0}.pdb")
        template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            try:
                solvent_content = sol_calc.calculate_from_struct(pdb_struct)
                if solvent_content < min_solvent_content:
                    msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                    logger.debug(msg, name, min_solvent_content)
                    continue
            except ValueError:
                msg = "Skipping %s: Error calculating solvent content"
                logger.debug(msg, name)

            x, y, z, intrad = pdb_struct.integration_box
            model_molecular_weight = pdb_struct.molecular_weight
            mw_diff = abs(predicted_molecular_weight - model_molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, x, y, z, intrad, solvent_content, None)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False)

        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles)

            amore_files = []
            for dat_model in sorted_dat_models[i:i + chunk_size]:
                logger.debug("Generating script to perform AMORE rotation " + "function on %s", dat_model.pdb_code)

                pdb_model = template_model.format(dat_model.pdb_code)
                table1 = template_table1.format(dat_model.pdb_code)
                hklpck1 = template_hklpck1.format(dat_model.pdb_code)
                clmn0 = template_clmn0.format(dat_model.pdb_code)
                clmn1 = template_clmn1.format(dat_model.pdb_code)
                mapout = template_mapout.format(dat_model.pdb_code)

                conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\""
                conv_py = conv_py.format(dat_model.dat_path, pdb_model)

                tab_cmd = [self.amore_exe, "xyzin1", pdb_model, "xyzout1", pdb_model, "table1", table1]
                tab_stdin = self.tabfun_stdin_template.format(
                    x=dat_model.x, y=dat_model.y, z=dat_model.z, a=90, b=90, c=120)

                rot_cmd = [
                    self.amore_exe, 'table1', table1, 'HKLPCK1', hklpck1, 'hklpck0', hklpck0, 'clmn1', clmn1, 'clmn0',
                    clmn0, 'MAPOUT', mapout
                ]
                rot_stdin = self.rotfun_stdin_template.format(
                    shres=shres, intrad=dat_model.intrad, pklim=pklim, npic=npic, step=rotastep)
                rot_log = template_rot_log.format(dat_model.pdb_code)

                tmp_dir = template_tmp_dir.format(dat_model.pdb_code)
                cmd = [
                    [EXPORT, "CCP4_SCR=" + tmp_dir],
                    ["mkdir", "-p", "$CCP4_SCR\n"],
                    [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep],
                    tab_cmd + ["<< eof >", os.devnull],
                    [tab_stdin],
                    ["eof"],
                    [os.linesep],
                    rot_cmd + ["<< eof >", rot_log],
                    [rot_stdin],
                    ["eof"],
                    [os.linesep],
                    ["grep", "-m 1", "SOLUTIONRCD", rot_log, os.linesep],
                    ["rm", "-rf", "$CCP4_SCR\n"],
                    [EXPORT, "CCP4_SCR=" + ccp4_scr],
                ]
                amore_script = pyjob.misc.make_script(
                    cmd, directory=script_log_dir, prefix="amore_", stem=dat_model.pdb_code)
                amore_log = amore_script.rsplit(".", 1)[0] + '.log'
                amore_files += [(amore_script, tab_stdin, rot_stdin, amore_log, dat_model.dat_path)]

            results = []
            if len(amore_files) > 0:
                logger.info("Running AMORE tab/rot functions")
                amore_scripts, _, _, amore_logs, dat_models = zip(*amore_files)
                simbad.rotsearch.submit_chunk(amore_scripts, script_log_dir, nproc, 'simbad_amore', submit_qtype,
                                              submit_queue, monitor, self.rot_succeeded_log)

                for dat_model, amore_log in zip(dat_models, amore_logs):
                    base = os.path.basename(amore_log)
                    pdb_code = base.replace("amore_", "").replace(".log", "")
                    try:
                        rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser(amore_log)
                        score = simbad.core.amore_score.AmoreRotationScore(
                            pdb_code, dat_model, rotsearch_parser.alpha, rotsearch_parser.beta, rotsearch_parser.gamma,
                            rotsearch_parser.cc_f, rotsearch_parser.rf_f, rotsearch_parser.cc_i, rotsearch_parser.cc_p,
                            rotsearch_parser.icp, rotsearch_parser.cc_f_z_score, rotsearch_parser.cc_p_z_score,
                            rotsearch_parser.num_of_rot)
                        if rotsearch_parser.cc_f_z_score:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

            self._search_results = results
            shutil.rmtree(script_log_dir)

            if os.path.isdir(default_tmp_dir):
                shutil.rmtree(default_tmp_dir)