Ejemplo n.º 1
0
 def test_tmp_file_9(self):
     tmp = tmp_file(delete=False, directory=os.getcwd(), prefix="first", suffix="last")
     self.assertTrue(os.path.isfile(tmp))
     path, name = os.path.split(tmp)
     self.assertEqual(os.getcwd(), path)
     self.assertTrue(name.startswith("first"))
     self.assertTrue(name.endswith("last"))
     os.remove(tmp)
Ejemplo n.º 2
0
 def test_tmp_file_4(self):
     tmp = tmp_file(prefix="first")
     self.assertTrue(os.path.isfile(tmp))
     self.assertTrue(os.path.basename(tmp).startswith("first"))
     os.remove(tmp)
Ejemplo n.º 3
0
 def test_tmp_file_3(self):
     tmp = tmp_file(directory=os.getcwd())
     self.assertTrue(os.path.isfile(tmp))
     path, _ = os.path.split(tmp)
     self.assertEqual(os.getcwd(), path)
     os.remove(tmp)
Ejemplo n.º 4
0
 def test_tmp_file_2(self):
     tmp = tmp_file(delete=True)
     self.assertFalse(os.path.isfile(tmp))
Ejemplo n.º 5
0
 def test_tmp_file_1(self):
     tmp = tmp_file()
     self.assertTrue(os.path.isfile(tmp))
     os.remove(tmp)
Ejemplo n.º 6
0
 def test_tmp_file_7(self):
     tmp = tmp_file(delete=True, stem="middle")
     self.assertFalse(os.path.isfile(tmp))
     self.assertTrue("middle" in os.path.basename(tmp))
Ejemplo n.º 7
0
 def test_tmp_file_6(self):
     tmp = tmp_file(suffix="last")
     self.assertTrue(os.path.isfile(tmp))
     self.assertTrue(os.path.basename(tmp).endswith("last"))
     os.remove(tmp)
Ejemplo n.º 8
0
 def test_tmp_file_5(self):
     tmp = tmp_file(stem="middle")
     self.assertTrue(os.path.isfile(tmp))
     self.assertTrue("middle" in os.path.basename(tmp))
     os.remove(tmp)
Ejemplo n.º 9
0
    def submit_jobs(self, results, nproc=1, process_all=False, submit_qtype=None, submit_queue=False, monitor=None):
        """Submit jobs to run in serial or on a cluster

        Parameters
        ----------
        results : class
            Results from :obj: '_LatticeParameterScore' or :obj: '_AmoreRotationScore'
        nproc : int, optional
            Number of processors to use [default: 1]
        process_all : bool, optional
            Terminate MR after a success [default: True]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor : str

        Returns
        -------
        file
            Output pdb from mr
        file
            Output hkl from mr - if using phaser
        file
            Output log file from mr program
        file
            Output pdb from refinement
        file
            Output hkl from refinement
        file
            Output log file from refinement program

        """
        if not os.path.isdir(self.output_dir):
            os.mkdir(self.output_dir)

        run_files = []
        sol_cont = SolventContent(self.cell_parameters, self.space_group)
        mat_prob = MatthewsProbability(self.cell_parameters, self.space_group)

        for result in results:
            mr_workdir = os.path.join(self.output_dir, result.pdb_code, 'mr', self.mr_program)
            mr_logfile = os.path.join(mr_workdir, '{0}_mr.log'.format(result.pdb_code))
            mr_pdbout = os.path.join(mr_workdir, '{0}_mr_output.pdb'.format(result.pdb_code))
            mr_hklout = os.path.join(mr_workdir, '{0}_mr_output.mtz'.format(result.pdb_code))

            ref_workdir = os.path.join(mr_workdir, 'refine')
            ref_hklout = os.path.join(ref_workdir, '{0}_refinement_output.mtz'.format(result.pdb_code))
            ref_logfile = os.path.join(ref_workdir, '{0}_ref.log'.format(result.pdb_code))
            ref_pdbout = os.path.join(ref_workdir, '{0}_refinement_output.pdb'.format(result.pdb_code))

            diff_mapout1 = os.path.join(ref_workdir, '{0}_refmac_2fofcwt.map'.format(result.pdb_code))
            diff_mapout2 = os.path.join(ref_workdir, '{0}_refmac_fofcwt.map'.format(result.pdb_code))

            if os.path.isfile(ref_logfile):
                rp = refmac_parser.RefmacParser(ref_logfile)
                if _mr_job_succeeded(rp.final_r_fact, rp.final_r_free):
                    score = MrScore(pdb_code=result.pdb_code)

                    if self.mr_program == "molrep":
                        mp = molrep_parser.MolrepParser(mr_logfile)
                        score.molrep_score = mp.score
                        score.molrep_tfscore = mp.tfscore
                    elif self.mr_program == "phaser":
                        pp = phaser_parser.PhaserParser(mr_logfile)
                        score.phaser_tfz = pp.tfz
                        score.phaser_llg = pp.llg
                        score.phaser_rfz = pp.rfz

                    rp = refmac_parser.RefmacParser(ref_logfile)
                    score.final_r_free = rp.final_r_free
                    score.final_r_fact = rp.final_r_fact
                    self._search_results = [score]
                    return

            if isinstance(result, AmoreRotationScore) or isinstance(result, PhaserRotationScore):
                pdb_struct = PdbStructure()
                pdb_struct.from_file(result.dat_path)
                mr_pdbin = os.path.join(self.output_dir, result.pdb_code + ".pdb")
                pdb_struct.save(mr_pdbin)
            elif isinstance(result, LatticeSearchResult):
                pdb_struct = PdbStructure()
                pdb_struct.from_file(result.pdb_path)
                mr_pdbin = result.pdb_path
            else:
                raise ValueError("Do not recognize result container")

            solvent_content = sol_cont.calculate_from_struct(pdb_struct)
            if solvent_content > 30:
                solvent_content, n_copies = mat_prob.calculate_content_ncopies_from_struct(pdb_struct)
            else:
                pdb_struct.keep_first_chain_only()
                pdb_struct.save(mr_pdbin)
                solvent_content, n_copies = mat_prob.calculate_content_ncopies_from_struct(pdb_struct)
                msg = "%s is predicted to be too large to fit in the unit "\
                    + "cell with a solvent content of at least 30 percent, "\
                    + "therefore MR will use only the first chain"
                logger.debug(msg, result.pdb_code)

            mr_cmd = [
                CMD_PREFIX, "ccp4-python", "-m", self.mr_python_module, "-hklin", self.mtz, "-hklout", mr_hklout,
                "-pdbin", mr_pdbin, "-pdbout", mr_pdbout, "-logfile", mr_logfile, "-work_dir", mr_workdir, "-nmol",
                n_copies, "-sgalternative", self.sgalternative
            ]

            ref_cmd = [
                CMD_PREFIX, "ccp4-python", "-m", self.refine_python_module, "-pdbin", mr_pdbout, "-pdbout", ref_pdbout,
                "-hklin", mr_hklout, "-hklout", ref_hklout, "-logfile", ref_logfile, "-work_dir", ref_workdir,
                "-refinement_type", self.refine_type, "-ncyc", self.refine_cycles
            ]

            if self.mr_program == "molrep":
                mr_cmd += ["-space_group", self.space_group]

            elif self.mr_program == "phaser":
                mr_cmd += [
                    "-i",
                    self.i,
                    "-sigi",
                    self.sigi,
                    "-f",
                    self.f,
                    "-sigf",
                    self.sigf,
                    "-solvent",
                    solvent_content,
                    "-timeout",
                    self.timeout,
                ]

                if isinstance(result, LatticeSearchResult):
                    mr_cmd += ['-autohigh', 4.0, '-hires', 5.0]

            # ====
            # Create a run script - prefix __needs__ to contain mr_program so we can find log
            # Leave order of this as SGE does not like scripts with numbers as first char
            # ====
            prefix, stem = self.mr_program + "_", result.pdb_code

            fft_cmd1, fft_stdin1 = self.fft(ref_hklout, diff_mapout1, "2mfo-dfc")
            run_stdin_1 = tmp_file(directory=self.output_dir, prefix=prefix, stem=stem, suffix="_1.stdin")
            with open(run_stdin_1, 'w') as f_out:
                f_out.write(fft_stdin1)

            fft_cmd2, fft_stdin2 = self.fft(ref_hklout, diff_mapout2, "mfo-dfc")
            run_stdin_2 = tmp_file(directory=self.output_dir, prefix=prefix, stem=stem, suffix="_2.stdin")
            with open(run_stdin_2, 'w') as f_out:
                f_out.write(fft_stdin2)

            ccp4_scr = os.environ["CCP4_SCR"]
            if self.tmp_dir:
                tmp_dir = os.path.join(self.tmp_dir)
            else:
                tmp_dir = os.path.join(self.output_dir)

            cmd = [
                [EXPORT, "CCP4_SCR=" + tmp_dir],
                mr_cmd + [os.linesep],
                ref_cmd + [os.linesep],
                fft_cmd1 + ["<", run_stdin_1, os.linesep],
                fft_cmd2 + ["<", run_stdin_2, os.linesep],
                [EXPORT, "CCP4_SCR=" + ccp4_scr],
            ]
            run_script = make_script(cmd, directory=self.output_dir, prefix=prefix, stem=stem)
            run_log = run_script.rsplit(".", 1)[0] + '.log'
            run_files += [(run_script, run_stdin_1, run_stdin_2, run_log, mr_pdbout, mr_logfile, ref_logfile)]

        if not self.mute:
            logger.info("Running %s Molecular Replacement", self.mr_program)
        run_scripts, _, _, _, mr_pdbouts, mr_logfiles, ref_logfiles = zip(*run_files)

        j = Job(submit_qtype)
        j.submit(
            run_scripts,
            directory=self.output_dir,
            nproc=nproc,
            name='simbad_mr',
            queue=submit_queue,
            permit_nonzero=True)

        interval = int(numpy.log(len(run_scripts)) / 3)
        interval_in_seconds = interval if interval >= 5 else 5
        if process_all:
            j.wait(interval=interval_in_seconds, monitor=monitor)
        else:
            j.wait(interval=interval_in_seconds, monitor=monitor, check_success=mr_succeeded_log)

        mr_results = []
        for result, mr_logfile, mr_pdbout, ref_logfile in zip(results, mr_logfiles, mr_pdbouts, ref_logfiles):
            if not os.path.isfile(mr_logfile):
                logger.debug("Cannot find %s MR log file: %s", self.mr_program, mr_logfile)
                continue
            elif not os.path.isfile(ref_logfile):
                logger.debug("Cannot find %s refine log file: %s", self.mr_program, ref_logfile)
                continue
            elif not os.path.isfile(mr_pdbout):
                logger.debug("Cannot find %s output file: %s", self.mr_program, mr_pdbout)
                continue

            score = MrScore(pdb_code=result.pdb_code)

            if self.mr_program == "molrep":
                mp = molrep_parser.MolrepParser(mr_logfile)
                score.molrep_score = mp.score
                score.molrep_tfscore = mp.tfscore
            elif self.mr_program == "phaser":
                pp = phaser_parser.PhaserParser(mr_logfile)
                score.phaser_tfz = pp.tfz
                score.phaser_llg = pp.llg
                score.phaser_rfz = pp.rfz

            if self._dano is not None:
                try:
                    anode = anomalous_util.AnodeSearch(self.mtz, self.output_dir, self.mr_program)
                    anode.run(result)
                    a = anode.search_results()
                    score.dano_peak_height = a.dano_peak_height
                    score.nearest_atom = a.nearest_atom
                except RuntimeError:
                    logger.debug("RuntimeError: Unable to create DANO map for: %s", result.pdb_code)
                except IOError:
                    logger.debug("IOError: Unable to create DANO map for: %s", result.pdb_code)

            if os.path.isfile(ref_logfile):
                rp = refmac_parser.RefmacParser(ref_logfile)
                score.final_r_free = rp.final_r_free
                score.final_r_fact = rp.final_r_fact
            else:
                logger.debug("Cannot find %s log file: %s", self.refine_program, ref_logfile)
            mr_results += [score]

        self._search_results = mr_results
Ejemplo n.º 10
0
 def test_check_script_4(self):
     ss = tmp_file(delete=False)
     with open(ss, "w") as f_out:
         f_out.write("sleep 1")
     self.assertRaises(PyJobError, Job.check_script, ss)