Ejemplo n.º 1
0
 def test_21(self):
     script = Script()
     script.append("test line")
     content = ["what the hell"]
     script.content = content
     assert script == ["what the hell"]
     assert script is not content
Ejemplo n.º 2
0
def write_jobscript(name,
                    keyword_file,
                    amoptd,
                    directory=None,
                    job_time=86400,
                    extra_options={}):
    """
    Create the script to run MrBump for this PDB.
    """
    if not directory:
        directory = os.getcwd()

    # Next the script to run mrbump
    script = Script(directory=directory,
                    prefix="",
                    stem=name,
                    suffix=ample_util.SCRIPT_EXT)
    if not sys.platform.startswith("win"):
        script.append('[[ ! -d $CCP4_SCR ]] && mkdir $CCP4_SCR\n\n')

    # Get the mrbump command-line
    jobcmd = mrbump_cmd.mrbump_cmd(name, amoptd['mtz'], amoptd['mr_sequence'],
                                   keyword_file)
    script.append(jobcmd)

    # Write script
    script.write()
    logger.debug("Wrote MRBUMP script: %s", script.path)

    return script
Ejemplo n.º 3
0
 def test_read_2(self):
     fh = tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".py")
     fh.write('print("PyJob is cool!")\n')
     fh.close()
     script = Script.read(fh.name)
     assert script.shebang == ""
     assert script.content == ['print("PyJob is cool!")']
     pytest.helpers.unlink([fh.name])
Ejemplo n.º 4
0
 def test_read_3(self):
     fh = tempfile.NamedTemporaryFile(mode="w",
                                      delete=False,
                                      suffix=ScriptProperty.SHELL.suffix)
     fh.close()
     script = Script.read(fh.name)
     assert script.shebang == ""
     assert script.content == []
     pytest.helpers.unlink([fh.name])
Ejemplo n.º 5
0
def get_py_script(i, target):
    script = Script(
        shebang="#!{}".format(sys.executable),
        prefix="pyjob",
        stem="test{}".format(i),
        suffix=".py",
    )
    script.extend(inspect.getsource(fibonacci).splitlines())
    script.pop(0)  # remove decorator
    script.extend([
        "if __name__ == '__main__':",
        "\ttarget = {}".format(target),
        "\tprint('{}th fib is: {}'.format(target, fibonacci(target)))",
    ])
    return script
Ejemplo n.º 6
0
 def test_read_7(self):
     fh = tempfile.NamedTemporaryFile(
         mode="w",
         dir=".",
         delete=True,
         prefix="pyjob",
         suffix=ScriptProperty.SHELL.suffix,
     )
     script = Script.read(fh.name)
     fh.close()
     assert script.directory == os.getcwd()
     assert script.prefix == ""
     assert script.stem[:5] == "pyjob"
     assert script.suffix == ScriptProperty.SHELL.suffix
Ejemplo n.º 7
0
    def write_script(self, work_dir, name, args, testcase_type):
        """Write script"""
        linechar = "^" if sys.platform.startswith('win') else "\\"

        script = Script(directory=work_dir, stem=name)
        test_exe = os.path.join(os.environ["CCP4"], "bin", "ample")
        test_exe = test_exe + ample_util.SCRIPT_EXT if sys.platform.startswith(
            "win") else test_exe
        if testcase_type == ENSEMBLER:
            test_exe = '{0} -m ample.ensembler'.format(
                os.path.join(os.environ["CCP4"], "bin", "ccp4-python"))
        elif testcase_type == MODELLING:
            test_exe = '{0} -m ample.modelling'.format(
                os.path.join(os.environ["CCP4"], "bin", "ccp4-python"))

        # All arguments need to be strings
        args = [map(str, a) for a in args]
        script.append("{0} {1}".format(test_exe, linechar))
        for argt in args:
            script.append(" ".join(argt) + " " + linechar)

        return script
Ejemplo n.º 8
0
 def _create_runscript(self):
     """Utility method to create runscript"""
     runscript = Script(
         directory=self.directory,
         prefix="lsf_",
         suffix=".script",
         stem=str(uuid.uuid1().int),
     )
     if self.dependency:
         cmd = "-w {}".format(" && ".join(
             [f"deps({d})" for d in self.dependency]))
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.directory:
         cmd = f"-cwd {self.directory}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.priority:
         cmd = f"-sp {self.priority}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.queue:
         cmd = f"-q {self.queue}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.runtime:
         cmd = f"-W {self.runtime}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.shell:
         cmd = f"-L {self.shell}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.nprocesses:
         cmd = f'-R "span[ptile={self.nprocesses}]"'
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.extra:
         cmd = " ".join(map(str, self.extra))
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if len(self.script) > 1:
         logf = runscript.path.replace(".script", ".log")
         jobsf = runscript.path.replace(".script", ".jobs")
         with open(jobsf, "w") as f_out:
             f_out.write("\n".join(self.script))
         cmd = f"-J {self.name}[1-{len(self.script)}]%{self.max_array_size}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + f" -o {logf}")
         runscript.extend(self.get_array_bash_extension(jobsf, 1))
     else:
         runscript.append(self.__class__.SCRIPT_DIRECTIVE +
                          f" -J {self.name}")
         runscript.append(self.__class__.SCRIPT_DIRECTIVE +
                          f" -o {self.log[0]}")
         runscript.append(self.script[0])
     return runscript
Ejemplo n.º 9
0
def create_morda_db(database,
                    nproc=2,
                    submit_qtype=None,
                    submit_queue=False,
                    chunk_size=5000):
    """Create the MoRDa search database

    Parameters
    ----------
    database : str
       The path to the database folder
    nproc : int, optional
       The number of processors [default: 2]
    submit_qtype : str
       The cluster submission queue type - currently support SGE and LSF
    submit_queue : str
       The queue to submit to on the cluster
    chunk_size : int, optional
       The number of jobs to submit at the same time [default: 5000]
    
    Raises
    ------
    RuntimeError
       Windows is currently not supported

    """
    if CUSTOM_PLATFORM == "windows":
        msg = "Windows is currently not supported"
        raise RuntimeError(msg)

    if not is_valid_db_location(database):
        raise RuntimeError("Permission denied! Cannot write to {}!".format(
            os.path.dirname(database)))

    if "MRD_DB" in os.environ:
        morda_installed_through_ccp4 = True
    else:
        download_morda()
        morda_installed_through_ccp4 = False

    morda_dat_path = os.path.join(os.environ['MRD_DB'], 'home', 'ca_DOM',
                                  '*.dat')
    simbad_dat_path = os.path.join(database, '**', '*.dat')
    morda_dat_files = set(
        [os.path.basename(f) for f in glob.glob(morda_dat_path)])
    simbad_dat_files = set(
        [os.path.basename(f) for f in glob.glob(simbad_dat_path)])
    erroneous_files = {
        "1bbzA_0.dat", "1gt0D_0.dat", "1h3oA_0.dat", "1kskA_1.dat",
        "1l0sA_0.dat"
    }

    def delete_erroneous_files(erroneous_paths):
        for f in erroneous_paths:
            if os.path.isfile(f):
                logger.warning(
                    "File flagged to be erroneous ... " +
                    "removing from database: %s", f)
                os.remove(f)

    erroneous_paths = [
        os.path.join(database, name[1:3], name) for name in erroneous_files
    ]
    delete_erroneous_files(erroneous_paths)

    dat_files = list(morda_dat_files - simbad_dat_files - erroneous_files)
    if len(dat_files) < 1:
        logger.info('SIMBAD database up-to-date')
        if not morda_installed_through_ccp4:
            shutil.rmtree(os.environ["MRD_DB"])
        leave_timestamp(os.path.join(database, 'simbad_morda.txt'))
        return
    else:
        logger.info(
            "%d new entries were found in the MoRDa database, " +
            "updating SIMBAD database", len(dat_files))

    exe = os.path.join(os.environ["MRD_PROG"], "get_model")

    run_dir = tmp_dir(directory=os.getcwd())

    # Submit in chunks, so we don't take too much disk space
    # and can terminate without loosing the processed data
    total_chunk_cycles = len(dat_files) // chunk_size + (len(dat_files) % 5 >
                                                         0)
    for cycle, i in enumerate(range(0, len(dat_files), chunk_size)):
        logger.info("Working on chunk %d out of %d", cycle + 1,
                    total_chunk_cycles)
        chunk_dat_files = dat_files[i:i + chunk_size]

        # Create the database files
        files = []
        collector = ScriptCollector(None)
        for f in chunk_dat_files:
            code = os.path.basename(f).rsplit('.', 1)[0]
            final_file = os.path.join(database, code[1:3], code + ".dat")
            # We need a temporary directory within because "get_model" uses non-unique file names
            tmp_d = tmp_dir(directory=run_dir)
            get_model_output = os.path.join(tmp_d, code + ".pdb")
            cmd = [["export CCP4_SCR=" + tmp_d],
                   ["export MRD_DB=" + os.environ['MRD_DB']], ["cd", tmp_d],
                   [exe, "-c", code, "-m", "d"]]
            script = Script(directory=tmp_d)
            for c in cmd:
                script.append(' '.join(map(str, c)))
            collector.add(script)
            log = script.path.rsplit('.', 1)[0] + '.log'
            files += [(script.path, log, tmp_d, (get_model_output, final_file))
                      ]

        scripts, _, tmps, files = zip(*files)

        submit_chunk(collector=collector,
                     run_dir=os.getcwd(),
                     nproc=nproc,
                     job_name='morda_db',
                     submit_qtype=submit_qtype,
                     submit_queue=submit_queue,
                     permit_nonzero=True,
                     monitor=None,
                     success_func=None)

        sub_dir_names = set([
            os.path.basename(f).rsplit('.', 1)[0][1:3] for f in chunk_dat_files
        ])
        for sub_dir_name in sub_dir_names:
            sub_dir = os.path.join(database, sub_dir_name)
            if os.path.isdir(sub_dir):
                continue
            os.makedirs(sub_dir)

        for output, final in files:
            if os.path.isfile(output):
                simbad.db.convert_pdb_to_dat(output, final)
            else:
                logger.critical("File missing: {}".format(output))

        for d in tmps:
            shutil.rmtree(d)

    shutil.rmtree(run_dir)
    if not morda_installed_through_ccp4:
        shutil.rmtree(os.environ["MRD_DB"])

    validate_compressed_database(database)
    leave_timestamp(os.path.join(database, 'simbad_morda.txt'))
Ejemplo n.º 10
0
def create_ensemble_db(database,
                       pdb_db,
                       nproc=2,
                       submit_qtype=None,
                       submit_queue=False,
                       chunk_size=5000):
    """Create the MoRDa search database

    Parameters
    ----------
    database : str
       The path to the database folder
    pdb_db : str
        The path to a local copy of the Protein Data Bank
    nproc : int, optional
       The number of processors [default: 2]
    submit_qtype : str
       The cluster submission queue type - currently support SGE and LSF
    submit_queue : str
       The queue to submit to on the cluster
    chunk_size : int, optional
       The number of jobs to submit at the same time [default: 5000]

    Raises
    ------
    RuntimeError
       Windows is currently not supported

    """
    if CUSTOM_PLATFORM == "windows":
        msg = "Windows is currently not supported"
        raise RuntimeError(msg)

    if not is_valid_db_location(database):
        raise RuntimeError("Permission denied! Cannot write to {}!".format(
            os.path.dirname(database)))

    if "MRD_DB" in os.environ:
        morda_installed_through_ccp4 = True
    else:
        download_morda()
        morda_installed_through_ccp4 = False

    morda_dat_path = os.path.join(os.environ['MRD_DB'], 'home', 'ca_DOM',
                                  '*.dat')
    simbad_dat_path = os.path.join(database, '**', '*.dat')
    morda_dat_files = set(
        [os.path.basename(f) for f in glob.glob(morda_dat_path)])
    simbad_dat_files = set(
        [os.path.basename(f) for f in glob.glob(simbad_dat_path)])
    erroneous_files = {
        "1bbzA_0.dat", "1gt0D_0.dat", "1h3oA_0.dat", "1kskA_1.dat",
        "1l0sA_0.dat"
    }

    def delete_erroneous_files(erroneous_paths):
        for f in erroneous_paths:
            if os.path.isfile(f):
                logger.warning(
                    "File flagged to be erroneous ... " +
                    "removing from database: %s", f)
                os.remove(f)

    erroneous_paths = [
        os.path.join(database, name[1:3], name) for name in erroneous_files
    ]
    delete_erroneous_files(erroneous_paths)

    dat_files = list(morda_dat_files - simbad_dat_files - erroneous_files)
    if len(dat_files) < 1:
        logger.info('SIMBAD ensemble database up-to-date')
        if not morda_installed_through_ccp4:
            shutil.rmtree(os.environ["MRD_DB"])
        leave_timestamp(os.path.join(database, 'simbad_morda.txt'))
        return
    else:
        logger.info(
            "%d new entries were found in the MoRDa database, " +
            "updating SIMBAD ensemble database", len(dat_files))

    exe = os.path.join(os.environ["MRD_PROG"], "get_model")

    mrbump_stdin = """
    MDLS True
    MDLC False
    MDLD False
    MDLP False
    MDLM False
    MDLU False
    CHECK False
    UPDATE False
    PICKLE False
    MRNUM 5
    SCOP False
    DEBUG False
    RLEVEL 100
    GESAMT_MERGE False
    USEE True
    GESE True
    GEST True
    AMPT False
    DOPHMMER True
    DOHHPRED False
    PDBLOCAL {}
    END
    """.format(pdb_db)

    run_dir = tmp_dir(directory=os.getcwd())

    # Generate the sub directories in advance
    sub_dir_names = set(
        [os.path.basename(f).rsplit('.', 1)[0][1:3] for f in dat_files])
    for sub_dir_name in sub_dir_names:
        sub_dir = os.path.join(database, sub_dir_name)
        if os.path.isdir(sub_dir):
            continue
        os.makedirs(sub_dir)

    # Submit in chunks, so we don't take too much disk space
    # and can terminate without loosing the processed data
    total_chunk_cycles = len(dat_files) // chunk_size + (len(dat_files) % 5 >
                                                         0)
    for cycle, i in enumerate(range(0, len(dat_files), chunk_size)):
        logger.info("Working on chunk %d out of %d", cycle + 1,
                    total_chunk_cycles)
        chunk_dat_files = dat_files[i:i + chunk_size]

        # Create the database files
        files = []
        collector = ScriptCollector(None)
        for f in chunk_dat_files:
            code = os.path.basename(f).rsplit('.', 1)[0]
            final_file = os.path.join(database, code[1:3], code + ".dat")
            # We need a temporary directory within because "get_model" uses non-unique file names
            tmp_d = tmp_dir(directory=run_dir)
            get_model_output = os.path.join(tmp_d, code + ".pdb")
            get_seq_output = os.path.join(tmp_d, code + ".seq")
            mrbump_directory = os.path.join(tmp_d, 'search_mrbump_1')
            cmd = [["export CCP4_SCR=".format(tmp_d)],
                   ["export MRD_DB=".format(os.environ['MRD_DB'])],
                   ["cd", tmp_d], [exe, "-c", code, "-m", "d"],
                   [
                       'ccp4-python', '-c', "'import simbad.util; "
                       "simbad.util.get_sequence(\"{0}\", \"{1}\")'".format(
                           get_model_output, get_seq_output)
                   ], ['mrbump', 'seqin', get_seq_output, '<< eof'],
                   [mrbump_stdin], ['eof'],
                   [
                       'ccp4-python', '-c', "'import simbad.util; "
                       "simbad.util.get_mrbump_ensemble(\"{0}\", \"{1}\")'".
                       format(mrbump_directory, final_file)
                   ]]

            script = Script(directory=tmp_d)
            for c in cmd:
                script.append(' '.join(map(str, c)))
            collector.add(script)
            log = script.path.rsplit('.', 1)[0] + '.log'
            files += [(script.path, log, tmp_d)]

        scripts, _, tmps = zip(*files)

        submit_chunk(collector=collector,
                     run_dir=os.getcwd(),
                     nproc=nproc,
                     job_name='ensemble_db',
                     submit_qtype=submit_qtype,
                     submit_queue=submit_queue,
                     permit_nonzero=True,
                     monitor=None,
                     success_func=None)

        for d in tmps:
            shutil.rmtree(d)

    shutil.rmtree(run_dir)
    if not morda_installed_through_ccp4:
        shutil.rmtree(os.environ["MRD_DB"])

    validate_compressed_database(database)
    leave_timestamp(os.path.join(database, 'simbad_ensemble.txt'))
Ejemplo n.º 11
0
    def comparison(self, models, structures):
        """
        Compare a list of model structures to a second list of reference structures

        Parameters
        ----------
        models : list
           List containing the paths to the model structure files
        structures : list
           List containing the paths to the reference structure files

        Returns
        -------
        entries : list
           List of TMscore data entries on a per-model basis

        """

        if len(models) < 1 or len(structures) < 1:
            msg = 'No model structures provided' if len(models) < 1 else 'No reference structures provided'
            logger.critical(msg)
            raise RuntimeError(msg)

        elif len(structures) == 1:
            logger.info('Using single structure provided for all model comparisons')
            structures = [structures[0] for _ in xrange(len(models))]

        elif len(models) != len(structures):
            msg = "Unequal number of models and structures!"
            logger.critical(msg)
            raise RuntimeError(msg)

        if self.method == "tmalign":
            pt = tm_parser.TMalignLogParser()
        elif self.method == "tmscore":
            pt = tm_parser.TMscoreLogParser()
        else:
            msg = "Invalid method selected: %s", self.method
            logger.critical(msg)
            raise RuntimeError(msg)

        logger.info('Using algorithm: {0}'.format(self.method))
        logger.info('------- Evaluating decoys -------')
        data_entries, log_files = [], []
        collector = ScriptCollector(None)
        for model_pdb, structure_pdb in zip(models, structures):
            model_name = os.path.splitext(os.path.basename(model_pdb))[0]
            structure_name = os.path.splitext(os.path.basename(structure_pdb))[0]
            stem = "_".join([model_name, structure_name, self.method])

            if os.path.isfile(model_pdb) and os.path.isfile(structure_pdb):
                data_entries.append([model_name, structure_name, model_pdb, structure_pdb])
                script = Script(directory=self.tmp_dir, prefic="tmscore_", stem=stem)
                script.append(" ".join([self.executable, model_pdb, structure_pdb]))
                collector.add(script)
                log_files.append(os.path.splitext(script)[0] + ".log")
            else:
                if not os.path.isfile(model_pdb):
                    logger.warning("Cannot find: %s", model_pdb)
                if not os.path.isfile(structure_pdb):
                    logger.warning("Cannot find: %s", structure_pdb)
                continue

        logger.info('Executing TManalysis scripts')

        j = Job(self._qtype)
        j.submit(job_scripts, nproc=self._nproc, max_array_jobs=self._max_array_jobs, queue=self._queue, name="tmscore")
        j.wait(interval=1)

        with TaskFactory(
                self._qtype,
                collector,
                name="tmscore",
                nprocesses=self._nproc,
                max_array_size=self._max_array_jobs,
                queue=self._queue,
                shell="/bin/bash",
        ) as task:
            task.run()
            task.wait(interval=1)

        self.entries = []
        for entry, log, script in zip(data_entries, log_files, job_scripts):
            try:
                pt.reset()
                pt.parse(log)
            except Exception:
                logger.critical("Error processing the %s log file: %s", self.method, log)
                log = "None"
            model_name, structure_name, model_pdb, structure_pdb = entry
            _entry = self._store(model_name, structure_name, model_pdb, structure_pdb, log, pt)
            self.entries.append(_entry)
            os.unlink(script)

        return self.entries
Ejemplo n.º 12
0
 def test_20(self):
     script = Script()
     script.append("test line")
     script.content = ["what the hell"]
     assert script == ["what the hell"]
Ejemplo n.º 13
0
 def test_19(self):
     with pytest.raises(ValueError):
         Script(suffix=",x")
Ejemplo n.º 14
0
 def test_16(self):
     with pytest.raises(ValueError):
         Script(suffix=None)
Ejemplo n.º 15
0
def create_contaminant_db(database,
                          add_morda_domains,
                          nproc=2,
                          submit_qtype=None,
                          submit_queue=False):
    """Create a contaminant database

    Parameters
    ----------
    database : str
        The path to the database folder
    add_morda_domains : bool
        Retrospectively add morda domains to a contaminant database updated when morda was not installed
    nproc : int, optional
        The number of processors [default: 2]
    submit_qtype : str
        The cluster submission queue type - currently support SGE and LSF
    submit_queue : str
        The queue to submit to on the cluster

    Raises
    ------
    RuntimeError
        dimple.contaminants.prepare module not available
    RuntimeError
       Windows is currently not supported
    """
    if not is_valid_db_location(database):
        raise RuntimeError("Permission denied! Cannot write to {}!".format(
            os.path.dirname(database)))

    import dimple.main
    logger.info('DIMPLE version: %s', dimple.main.__version__)

    if StrictVersion(dimple.main.__version__) < StrictVersion('2.5.7'):
        msg = "This feature will be available with dimple version 2.5.7"
        raise RuntimeError(msg)

    if CUSTOM_PLATFORM == "windows":
        msg = "Windows is currently not supported"
        raise RuntimeError(msg)

    import dimple.contaminants.prepare

    dimple.contaminants.prepare.main(verbose=False)

    simbad_dat_path = os.path.join(database, '*', '*', '*', '*.dat')
    existing_dat_files = [
        os.path.basename(f).split('.')[0].lower()
        for f in glob.iglob(simbad_dat_path)
    ]
    erroneous_files = ['4v43']
    dimple_files = ['cached', 'data.json', 'data.py']

    with open("data.json") as data_file:
        data = json.load(data_file)

    results = []
    for child in data["children"]:
        try:
            for child_2 in child["children"]:
                space_group = child_2["name"].replace(" ", "")
                for child_3 in child_2["children"]:
                    pdb_code = child_3["name"].split()[0].lower()
                    if (pdb_code in existing_dat_files or pdb_code
                            in erroneous_files) and not add_morda_domains:
                        continue
                    uniprot_name = child["name"]
                    uniprot_mnemonic = uniprot_name.split('_')[1]
                    score = ContaminantSearchResult(pdb_code, space_group,
                                                    uniprot_name,
                                                    uniprot_mnemonic)
                    results.append(score)
        except KeyError:
            pass

    if len(results) == 0:
        logger.info("Contaminant database up to date")
    else:
        if add_morda_domains:
            logger.info("Adding morda domains to contaminant database")
        else:
            logger.info(
                "%d new entries were found in the contaminant database, " +
                "updating SIMBAD database", len(results))

        if "MRD_DB" in os.environ:
            morda_installed_through_ccp4 = True
        else:
            morda_installed_through_ccp4 = False

        if add_morda_domains and not morda_installed_through_ccp4:
            logger.critical(
                "Morda not installed locally, unable to add morda domains to contaminant database"
            )

        if morda_installed_through_ccp4:
            morda_dat_path = os.path.join(os.environ['MRD_DB'], 'home',
                                          'ca_DOM', '*.dat')
            morda_dat_files = set(
                [os.path.basename(f) for f in glob.iglob(morda_dat_path)])
            exe = os.path.join(os.environ['MRD_PROG'], "get_model")
        else:
            logger.info(
                "Morda not installed locally, therefore morda domains will not be added to contaminant database"
            )

        files = []
        collector = ScriptCollector(None)
        for result in results:
            stem = os.path.join(os.getcwd(), database, result.uniprot_mnemonic,
                                result.uniprot_name, result.space_group)
            if not os.path.exists(stem):
                os.makedirs(stem)

            content = PdbStructure.get_pdb_content(result.pdb_code)
            if content is None:
                logger.debug(
                    "Encountered a problem downloading PDB %s - skipping entry",
                    result.pdb_code)
            else:
                dat_content = simbad.db._str_to_dat(content)
                with open(os.path.join(stem, result.pdb_code + ".dat"),
                          "w") as f_out:
                    f_out.write(dat_content)

                if simbad.db.is_valid_dat(
                        os.path.join(stem, result.pdb_code + ".dat")):
                    pass
                else:
                    logger.debug("Unable to convert %s to dat file",
                                 result.pdb_code)

            if morda_installed_through_ccp4:
                for dat_file in morda_dat_files:
                    if result.pdb_code.lower() == dat_file[0:4]:
                        stem = os.path.join(database, result.uniprot_mnemonic,
                                            result.uniprot_name,
                                            result.space_group, "morda")
                        if not os.path.exists(stem):
                            os.makedirs(stem)
                        code = dat_file.rsplit('.', 1)[0]
                        final_file = os.path.join(stem, dat_file)
                        tmp_d = tmp_dir(directory=os.getcwd())
                        get_model_output = os.path.join(tmp_d, code + ".pdb")
                        cmd = [["export CCP4_SCR=", tmp_d], ["cd", tmp_d],
                               [exe, "-c", code, "-m", "d"]]
                        script = Script(directory=tmp_d)
                        for c in cmd:
                            script.append(' '.join(map(str, c)))
                        collector.add(script)
                        log = script.path.rsplit('.', 1)[0] + '.log'
                        files += [(script.path, log, tmp_d, (get_model_output,
                                                             final_file))]

        if len(files) > 0:
            scripts, _, tmps, files = zip(*files)

            submit_chunk(collector=collector,
                         run_dir=os.getcwd(),
                         nproc=nproc,
                         job_name='cont_db',
                         submit_qtype=submit_qtype,
                         submit_queue=submit_queue,
                         permit_nonzero=True,
                         monitor=None,
                         success_func=None)

            for output, final in files:
                if os.path.isfile(output):
                    simbad.db.convert_pdb_to_dat(output, final)
                else:
                    print "File missing: {}".format(output)

            for d in tmps:
                shutil.rmtree(d)

            for f in dimple_files:
                if os.path.isdir(f):
                    shutil.rmtree(f)
                elif os.path.isfile(f):
                    os.remove(f)

    validate_compressed_database(database)
Ejemplo n.º 16
0
 def example_function(option):
     cmd = ["echo {}".format(option)]
     script = Script(directory=os.getcwd())
     for c in cmd:
         script.append(c)
     return script
Ejemplo n.º 17
0
 def _create_runscript(self):
     """Utility method to create runscript"""
     runscript = Script(
         directory=self.directory,
         prefix="sge_",
         suffix=".script",
         stem=str(uuid.uuid1().int),
     )
     runscript.append(self.__class__.SCRIPT_DIRECTIVE + " -V")
     runscript.append(self.__class__.SCRIPT_DIRECTIVE + " -w e")
     runscript.append(self.__class__.SCRIPT_DIRECTIVE + " -j yes")
     runscript.append(self.__class__.SCRIPT_DIRECTIVE + f" -N {self.name}")
     if self.dependency:
         cmd = f'-hold_jid {",".join(map(str, self.dependency))}'
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.priority:
         cmd = f"-p {self.priority}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.queue:
         cmd = f"-q {self.queue}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.runtime:
         cmd = f"-l h_rt={self.get_time(self.runtime)}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.shell:
         cmd = f"-S {self.shell}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.nprocesses and self.environment:
         cmd = f"-pe {self.environment} {self.nprocesses}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.directory:
         cmd = f"-wd {self.directory}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.extra:
         cmd = " ".join(map(str, self.extra))
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if len(self.script) > 1:
         logf = runscript.path.replace(".script", ".log")
         jobsf = runscript.path.replace(".script", ".jobs")
         with open(jobsf, "w") as f_out:
             f_out.write("\n".join(self.script))
         cmd = f"-t 1-{len(self.script)} -tc {self.max_array_size}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + f" -o {logf}")
         runscript.extend(self.get_array_bash_extension(jobsf, 0))
     else:
         runscript.append(self.__class__.SCRIPT_DIRECTIVE +
                          f" -o {self.log[0]}")
         runscript.append(self.script[0])
     return runscript
Ejemplo n.º 18
0
    def generate_script(self, dat_model):
        logger.debug(
            "Generating script to perform PHASER rotation " + "function on %s",
            dat_model.pdb_code)

        pdb_model = self.template_model.format(dat_model.pdb_code)
        template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log")

        conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\""
        conv_py = conv_py.format(dat_model.dat_path, pdb_model)

        rot_log = template_rot_log.format(dat_model.pdb_code)
        tmp_dir = self.template_tmp_dir.format(dat_model.pdb_code)

        phaser_cmd = [
            "simbad.rotsearch.phaser_rotation_search",
            "-eid",
            self.eid,
            "-hklin",
            self.mtz,
            "-f",
            self.mtz_labels.f,
            "-sigf",
            self.mtz_labels.sigf,
            "-i",
            self.mtz_labels.i,
            "-sigi",
            self.mtz_labels.sigi,
            "-pdbin",
            pdb_model,
            "-logfile",
            rot_log,
            "-solvent",
            dat_model.solvent,
            "-nmol",
            dat_model.nmol,
            "-work_dir",
            tmp_dir,
        ]
        phaser_cmd = " ".join(str(e) for e in phaser_cmd)

        cmd = [
            [EXPORT, "CCP4_SCR=" + tmp_dir],
            ["mkdir", "-p", "$CCP4_SCR\n"],
            [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep],
            [
                CMD_PREFIX, "$CCP4/bin/ccp4-python", "-m", phaser_cmd,
                os.linesep
            ],
            ["rm", "-rf", "$CCP4_SCR\n"],
            [EXPORT, "CCP4_SCR=" + self.ccp4_scr],
        ]

        phaser_script = Script(directory=self.script_log_dir,
                               prefix="phaser_",
                               stem=dat_model.pdb_code)
        for c in cmd:
            phaser_script.append(' '.join(map(str, c)))
        phaser_log = phaser_script.path.rsplit(".", 1)[0] + '.log'
        phaser_files = (phaser_log, dat_model.dat_path)
        phaser_script.write()
        return phaser_script, phaser_files
Ejemplo n.º 19
0
    def generate_script(self, dat_model):
        logger.debug(
            "Generating script to perform AMORE rotation " + "function on %s",
            dat_model.pdb_code)

        pdb_model = self.template_model.format(dat_model.pdb_code)
        table1 = self.template_table1.format(dat_model.pdb_code)
        hklpck1 = self.template_hklpck1.format(dat_model.pdb_code)
        clmn0 = self.template_clmn0.format(dat_model.pdb_code)
        clmn1 = self.template_clmn1.format(dat_model.pdb_code)
        mapout = self.template_mapout.format(dat_model.pdb_code)

        conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\""
        conv_py = conv_py.format(dat_model.dat_path, pdb_model)

        tab_cmd = [
            self.amore_exe, "xyzin1", pdb_model, "xyzout1", pdb_model,
            "table1", table1
        ]
        tab_stdin = self.tabfun_stdin_template.format(x=dat_model.x,
                                                      y=dat_model.y,
                                                      z=dat_model.z,
                                                      a=90,
                                                      b=90,
                                                      c=120)

        rot_cmd = [
            self.amore_exe, 'table1', table1, 'HKLPCK1', hklpck1, 'hklpck0',
            self.hklpck0, 'clmn1', clmn1, 'clmn0', clmn0, 'MAPOUT', mapout
        ]
        rot_stdin = self.rotfun_stdin_template.format(shres=self.shres,
                                                      intrad=dat_model.intrad,
                                                      pklim=self.pklim,
                                                      npic=self.npic,
                                                      step=self.rotastep)
        rot_log = self.template_rot_log.format(dat_model.pdb_code)

        tmp_dir = self.template_tmp_dir.format(dat_model.pdb_code)
        cmd = [
            [EXPORT, "CCP4_SCR=" + tmp_dir],
            ["mkdir", "-p", "$CCP4_SCR\n"],
            [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep],
            tab_cmd + ["<< eof >", os.devnull],
            [tab_stdin],
            ["eof"],
            [os.linesep],
            rot_cmd + ["<< eof >", rot_log],
            [rot_stdin],
            ["eof"],
            [os.linesep],
            ["grep", "-m 1", "SOLUTIONRCD", rot_log, os.linesep],
            ["rm", "-rf", "$CCP4_SCR\n"],
            [EXPORT, "CCP4_SCR=" + self.ccp4_scr],
        ]
        amore_script = Script(directory=self.script_log_dir,
                              prefix="amore_",
                              stem=dat_model.pdb_code)
        for c in cmd:
            amore_script.append(' '.join(map(str, c)))
        amore_log = amore_script.path.rsplit(".", 1)[0] + '.log'
        amore_files = (amore_log, dat_model.dat_path)
        amore_script.write()
        return amore_script, amore_files
Ejemplo n.º 20
0
 def _create_runscript(self):
     """Utility method to create runscript"""
     runscript = Script(
         directory=self.directory,
         prefix="slurm_",
         suffix=".script",
         stem=str(uuid.uuid1().int),
     )
     runscript.append(self.__class__.SCRIPT_DIRECTIVE + " --export=ALL")
     runscript.append(self.__class__.SCRIPT_DIRECTIVE +
                      f" --job-name={self.name}")
     if self.dependency:
         cmd = f'--depend=afterok:{":".join(map(str, self.dependency))}'
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.queue:
         cmd = f"-p {self.queue}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.nprocesses:
         cmd = f"-n {self.nprocesses}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.directory:
         cmd = f"--workdir={self.directory}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.runtime:
         cmd = f"-t {self.runtime}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if self.extra:
         cmd = " ".join(map(str, self.extra))
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
     if len(self.script) > 1:
         logf = runscript.path.replace(".script", ".log")
         jobsf = runscript.path.replace(".script", ".jobs")
         with open(jobsf, "w") as f_out:
             f_out.write("\n".join(self.script))
         cmd = f"--array=1-{len(self.script)}%{self.max_array_size}"
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + " " + cmd)
         runscript.append(self.__class__.SCRIPT_DIRECTIVE + f" -o {logf}")
         runscript.extend(self.get_array_bash_extension(jobsf, 0))
     else:
         runscript.append(self.__class__.SCRIPT_DIRECTIVE +
                          f" -o {self.log[0]}")
         runscript.append(self.script[0])
     return runscript