Example #1
0
 def test_sub_4(self):
     directory = os.getcwd()
     jobs = [
         make_script([["sleep 5"], ['echo "file {0}"'.format(i)]],
                     directory=directory) for i in range(5)
     ]
     array_script, array_jobs = prep_array_script(jobs, directory,
                                                  SunGridEngine.TASK_ID)
     jobid = SunGridEngine.sub(array_script,
                               array=[1, 5],
                               log=os.devnull,
                               name=inspect.stack()[0][3],
                               shell="/bin/sh")
     while SunGridEngine.stat(jobid):
         time.sleep(1)
     for i, j in enumerate(jobs):
         f = j.replace(".sh", ".log")
         self.assertTrue(os.path.isfile(f))
         self.assertEqual("file {0}".format(i), open(f).read().strip())
         os.unlink(f)
     for f in jobs + [array_script, array_jobs]:
         os.unlink(f)
Example #2
0
 def test_rls_1(self):
     jobs = [make_script(["touch", "pyjob_rls_test_1"])]
     jobid = LoadSharingFacility.sub(jobs,
                                     hold=True,
                                     name=inspect.stack()[0][3],
                                     shell="/bin/sh")
     time.sleep(5)
     LoadSharingFacility.rls(jobid)
     start, timeout = time.time(), False
     while LoadSharingFacility.stat(jobid):
         # Don't wait too long, one minute, then fail
         if ((time.time() - start) // 60) >= 1:
             LoadSharingFacility.kill(jobid)
             timeout = True
         time.sleep(10)
     for f in jobs:
         os.unlink(f)
     if timeout:
         self.assertEqual(1, 0, "Timeout")
     else:
         self.assertTrue(os.path.isfile('pyjob_rls_test_1'))
         os.unlink('pyjob_rls_test_1')
Example #3
0
 def test_sub_7(self):
     assert "PYJOB_ENV1" not in os.environ
     os.environ["PYJOB_ENV1"] = "pyjob_random1"
     jobs = [
         make_script(["echo $PYJOB_ENV1"], directory=os.getcwd())
         for _ in range(2)
     ]
     array_script, array_jobs = prep_array_script(jobs, os.getcwd(),
                                                  SunGridEngine.TASK_ID)
     jobid = SunGridEngine.sub(array_script,
                               array=[1, 2],
                               directory=os.getcwd(),
                               log=os.devnull,
                               name=inspect.stack()[0][3],
                               shell="/bin/sh")
     while SunGridEngine.stat(jobid):
         time.sleep(1)
     for i, j in enumerate(jobs):
         f = j.replace(".sh", ".log")
         self.assertTrue(os.path.isfile(f))
         self.assertEqual(os.environ["PYJOB_ENV1"], open(f).read().strip())
         os.unlink(f)
     for f in jobs + [array_script, array_jobs]:
         os.unlink(f)
Example #4
0
    def submit_jobs(self, results, nproc=1, process_all=False, submit_qtype=None, submit_queue=False, monitor=None):
        """Submit jobs to run in serial or on a cluster

        Parameters
        ----------
        results : class
            Results from :obj: '_LatticeParameterScore' or :obj: '_AmoreRotationScore'
        nproc : int, optional
            Number of processors to use [default: 1]
        process_all : bool, optional
            Terminate MR after a success [default: True]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor : str

        Returns
        -------
        file
            Output pdb from mr
        file
            Output hkl from mr - if using phaser
        file
            Output log file from mr program
        file
            Output pdb from refinement
        file
            Output hkl from refinement
        file
            Output log file from refinement program

        """
        if not os.path.isdir(self.output_dir):
            os.mkdir(self.output_dir)

        run_files = []
        sol_cont = SolventContent(self.cell_parameters, self.space_group)
        mat_prob = MatthewsProbability(self.cell_parameters, self.space_group)

        for result in results:
            mr_workdir = os.path.join(self.output_dir, result.pdb_code, 'mr', self.mr_program)
            mr_logfile = os.path.join(mr_workdir, '{0}_mr.log'.format(result.pdb_code))
            mr_pdbout = os.path.join(mr_workdir, '{0}_mr_output.pdb'.format(result.pdb_code))
            mr_hklout = os.path.join(mr_workdir, '{0}_mr_output.mtz'.format(result.pdb_code))

            ref_workdir = os.path.join(mr_workdir, 'refine')
            ref_hklout = os.path.join(ref_workdir, '{0}_refinement_output.mtz'.format(result.pdb_code))
            ref_logfile = os.path.join(ref_workdir, '{0}_ref.log'.format(result.pdb_code))
            ref_pdbout = os.path.join(ref_workdir, '{0}_refinement_output.pdb'.format(result.pdb_code))

            diff_mapout1 = os.path.join(ref_workdir, '{0}_refmac_2fofcwt.map'.format(result.pdb_code))
            diff_mapout2 = os.path.join(ref_workdir, '{0}_refmac_fofcwt.map'.format(result.pdb_code))

            if os.path.isfile(ref_logfile):
                rp = refmac_parser.RefmacParser(ref_logfile)
                if _mr_job_succeeded(rp.final_r_fact, rp.final_r_free):
                    score = MrScore(pdb_code=result.pdb_code)

                    if self.mr_program == "molrep":
                        mp = molrep_parser.MolrepParser(mr_logfile)
                        score.molrep_score = mp.score
                        score.molrep_tfscore = mp.tfscore
                    elif self.mr_program == "phaser":
                        pp = phaser_parser.PhaserParser(mr_logfile)
                        score.phaser_tfz = pp.tfz
                        score.phaser_llg = pp.llg
                        score.phaser_rfz = pp.rfz

                    rp = refmac_parser.RefmacParser(ref_logfile)
                    score.final_r_free = rp.final_r_free
                    score.final_r_fact = rp.final_r_fact
                    self._search_results = [score]
                    return

            if isinstance(result, AmoreRotationScore) or isinstance(result, PhaserRotationScore):
                pdb_struct = PdbStructure()
                pdb_struct.from_file(result.dat_path)
                mr_pdbin = os.path.join(self.output_dir, result.pdb_code + ".pdb")
                pdb_struct.save(mr_pdbin)
            elif isinstance(result, LatticeSearchResult):
                pdb_struct = PdbStructure()
                pdb_struct.from_file(result.pdb_path)
                mr_pdbin = result.pdb_path
            else:
                raise ValueError("Do not recognize result container")

            solvent_content = sol_cont.calculate_from_struct(pdb_struct)
            if solvent_content > 30:
                solvent_content, n_copies = mat_prob.calculate_content_ncopies_from_struct(pdb_struct)
            else:
                pdb_struct.keep_first_chain_only()
                pdb_struct.save(mr_pdbin)
                solvent_content, n_copies = mat_prob.calculate_content_ncopies_from_struct(pdb_struct)
                msg = "%s is predicted to be too large to fit in the unit "\
                    + "cell with a solvent content of at least 30 percent, "\
                    + "therefore MR will use only the first chain"
                logger.debug(msg, result.pdb_code)

            mr_cmd = [
                CMD_PREFIX, "ccp4-python", "-m", self.mr_python_module, "-hklin", self.mtz, "-hklout", mr_hklout,
                "-pdbin", mr_pdbin, "-pdbout", mr_pdbout, "-logfile", mr_logfile, "-work_dir", mr_workdir, "-nmol",
                n_copies, "-sgalternative", self.sgalternative
            ]

            ref_cmd = [
                CMD_PREFIX, "ccp4-python", "-m", self.refine_python_module, "-pdbin", mr_pdbout, "-pdbout", ref_pdbout,
                "-hklin", mr_hklout, "-hklout", ref_hklout, "-logfile", ref_logfile, "-work_dir", ref_workdir,
                "-refinement_type", self.refine_type, "-ncyc", self.refine_cycles
            ]

            if self.mr_program == "molrep":
                mr_cmd += ["-space_group", self.space_group]

            elif self.mr_program == "phaser":
                mr_cmd += [
                    "-i",
                    self.i,
                    "-sigi",
                    self.sigi,
                    "-f",
                    self.f,
                    "-sigf",
                    self.sigf,
                    "-solvent",
                    solvent_content,
                    "-timeout",
                    self.timeout,
                ]

                if isinstance(result, LatticeSearchResult):
                    mr_cmd += ['-autohigh', 4.0, '-hires', 5.0]

            # ====
            # Create a run script - prefix __needs__ to contain mr_program so we can find log
            # Leave order of this as SGE does not like scripts with numbers as first char
            # ====
            prefix, stem = self.mr_program + "_", result.pdb_code

            fft_cmd1, fft_stdin1 = self.fft(ref_hklout, diff_mapout1, "2mfo-dfc")
            run_stdin_1 = tmp_file(directory=self.output_dir, prefix=prefix, stem=stem, suffix="_1.stdin")
            with open(run_stdin_1, 'w') as f_out:
                f_out.write(fft_stdin1)

            fft_cmd2, fft_stdin2 = self.fft(ref_hklout, diff_mapout2, "mfo-dfc")
            run_stdin_2 = tmp_file(directory=self.output_dir, prefix=prefix, stem=stem, suffix="_2.stdin")
            with open(run_stdin_2, 'w') as f_out:
                f_out.write(fft_stdin2)

            ccp4_scr = os.environ["CCP4_SCR"]
            if self.tmp_dir:
                tmp_dir = os.path.join(self.tmp_dir)
            else:
                tmp_dir = os.path.join(self.output_dir)

            cmd = [
                [EXPORT, "CCP4_SCR=" + tmp_dir],
                mr_cmd + [os.linesep],
                ref_cmd + [os.linesep],
                fft_cmd1 + ["<", run_stdin_1, os.linesep],
                fft_cmd2 + ["<", run_stdin_2, os.linesep],
                [EXPORT, "CCP4_SCR=" + ccp4_scr],
            ]
            run_script = make_script(cmd, directory=self.output_dir, prefix=prefix, stem=stem)
            run_log = run_script.rsplit(".", 1)[0] + '.log'
            run_files += [(run_script, run_stdin_1, run_stdin_2, run_log, mr_pdbout, mr_logfile, ref_logfile)]

        if not self.mute:
            logger.info("Running %s Molecular Replacement", self.mr_program)
        run_scripts, _, _, _, mr_pdbouts, mr_logfiles, ref_logfiles = zip(*run_files)

        j = Job(submit_qtype)
        j.submit(
            run_scripts,
            directory=self.output_dir,
            nproc=nproc,
            name='simbad_mr',
            queue=submit_queue,
            permit_nonzero=True)

        interval = int(numpy.log(len(run_scripts)) / 3)
        interval_in_seconds = interval if interval >= 5 else 5
        if process_all:
            j.wait(interval=interval_in_seconds, monitor=monitor)
        else:
            j.wait(interval=interval_in_seconds, monitor=monitor, check_success=mr_succeeded_log)

        mr_results = []
        for result, mr_logfile, mr_pdbout, ref_logfile in zip(results, mr_logfiles, mr_pdbouts, ref_logfiles):
            if not os.path.isfile(mr_logfile):
                logger.debug("Cannot find %s MR log file: %s", self.mr_program, mr_logfile)
                continue
            elif not os.path.isfile(ref_logfile):
                logger.debug("Cannot find %s refine log file: %s", self.mr_program, ref_logfile)
                continue
            elif not os.path.isfile(mr_pdbout):
                logger.debug("Cannot find %s output file: %s", self.mr_program, mr_pdbout)
                continue

            score = MrScore(pdb_code=result.pdb_code)

            if self.mr_program == "molrep":
                mp = molrep_parser.MolrepParser(mr_logfile)
                score.molrep_score = mp.score
                score.molrep_tfscore = mp.tfscore
            elif self.mr_program == "phaser":
                pp = phaser_parser.PhaserParser(mr_logfile)
                score.phaser_tfz = pp.tfz
                score.phaser_llg = pp.llg
                score.phaser_rfz = pp.rfz

            if self._dano is not None:
                try:
                    anode = anomalous_util.AnodeSearch(self.mtz, self.output_dir, self.mr_program)
                    anode.run(result)
                    a = anode.search_results()
                    score.dano_peak_height = a.dano_peak_height
                    score.nearest_atom = a.nearest_atom
                except RuntimeError:
                    logger.debug("RuntimeError: Unable to create DANO map for: %s", result.pdb_code)
                except IOError:
                    logger.debug("IOError: Unable to create DANO map for: %s", result.pdb_code)

            if os.path.isfile(ref_logfile):
                rp = refmac_parser.RefmacParser(ref_logfile)
                score.final_r_free = rp.final_r_free
                score.final_r_fact = rp.final_r_fact
            else:
                logger.debug("Cannot find %s log file: %s", self.refine_program, ref_logfile)
            mr_results += [score]

        self._search_results = mr_results
Example #5
0
    def comparison(self, models, structures):
        """
        Compare a list of model structures to a second list of reference structures

        Parameters
        ----------
        models : list
           List containing the paths to the model structure files
        structures : list
           List containing the paths to the reference structure files

        Returns
        -------
        entries : list
           List of TMscore data entries on a per-model basis

        """

        if len(models) < 1 or len(structures) < 1:
            msg = 'No model structures provided' if len(
                models) < 1 else 'No reference structures provided'
            logger.critical(msg)
            raise RuntimeError(msg)

        elif len(structures) == 1:
            logger.info(
                'Using single structure provided for all model comparisons')
            structures = [structures[0] for _ in xrange(len(models))]

        elif len(models) != len(structures):
            msg = "Unequal number of models and structures!"
            logger.critical(msg)
            raise RuntimeError(msg)

        if self.method == "tmalign":
            pt = tm_parser.TMalignLogParser()
        elif self.method == "tmscore":
            pt = tm_parser.TMscoreLogParser()
        else:
            msg = "Invalid method selected: %s", self.method
            logger.critical(msg)
            raise RuntimeError(msg)

        logger.info('Using algorithm: {0}'.format(self.method))
        logger.info('------- Evaluating decoys -------')
        data_entries, job_scripts, log_files = [], [], []
        for model_pdb, structure_pdb in zip(models, structures):
            model_name = os.path.splitext(os.path.basename(model_pdb))[0]
            structure_name = os.path.splitext(
                os.path.basename(structure_pdb))[0]
            stem = "_".join([model_name, structure_name, self.method])

            if os.path.isfile(model_pdb) and os.path.isfile(structure_pdb):
                data_entries.append(
                    [model_name, structure_name, model_pdb, structure_pdb])
                script = make_script(
                    [self.executable, model_pdb, structure_pdb],
                    prefix="tmscore_",
                    stem=stem,
                    directory=self.tmp_dir)
                job_scripts.append(script)
                log_files.append(os.path.splitext(script)[0] + ".log")
            else:
                if not os.path.isfile(model_pdb):
                    logger.warning("Cannot find: %s", model_pdb)
                if not os.path.isfile(structure_pdb):
                    logger.warning("Cannot find: %s", structure_pdb)
                continue

        logger.info('Executing TManalysis scripts')
        j = Job(self._qtype)
        j.submit(job_scripts,
                 nproc=self._nproc,
                 max_array_jobs=self._max_array_jobs,
                 queue=self._queue,
                 name="tmscore")
        j.wait(interval=1)

        self.entries = []
        for entry, log, script in zip(data_entries, log_files, job_scripts):
            try:
                pt.reset()
                pt.parse(log)
            except Exception:
                logger.critical("Error processing the %s log file: %s",
                                self.method, log)
                log = "None"
            model_name, structure_name, model_pdb, structure_pdb = entry
            _entry = self._store(model_name, structure_name, model_pdb,
                                 structure_pdb, log, pt)
            self.entries.append(_entry)
            os.unlink(script)

        return self.entries
Example #6
0
def create_morda_db(database, nproc=2, submit_qtype=None, submit_queue=False, chunk_size=5000):
    """Create the MoRDa search database

    Parameters
    ----------
    database : str
       The path to the database folder
    nproc : int, optional
       The number of processors [default: 2]
    submit_qtype : str
       The cluster submission queue type - currently support SGE and LSF
    submit_queue : str
       The queue to submit to on the cluster
    chunk_size : int, optional
       The number of jobs to submit at the same time [default: 5000]

    Raises
    ------
    RuntimeError
       Windows is currently not supported

    """
    if CUSTOM_PLATFORM == "windows":
        msg = "Windows is currently not supported"
        raise RuntimeError(msg)

    if not is_valid_db_location(database):
        raise RuntimeError("Permission denied! Cannot write to {}!".format(os.path.dirname(database)))

    if "MRD_DB" in os.environ:
        morda_installed_through_ccp4 = True
    else:
        download_morda()
        morda_installed_through_ccp4 = False

    morda_dat_path = os.path.join(os.environ['MRD_DB'], 'home', 'ca_DOM', '*.dat')
    #simbad_dat_path = os.path.join(database, '**', '*.dat')
    simbad_pdb_path = os.path.join(database, '**', '*.pdb')
    morda_dat_files = set([os.path.basename(f) for f in glob.glob(morda_dat_path)])
    # simbad_dat_files = set([os.path.basename(f) for f in glob.glob(simbad_dat_path)])
    simbad_dat_files = set([os.path.basename(f).split('.')[0] + '.dat' for f in glob.glob(simbad_pdb_path)])
    # erroneous_files = set(["1bbzA_0.dat", "1gt0D_0.dat", "1h3oA_0.dat", "1kskA_1.dat", "1l0sA_0.dat"])
    erroneous_files = set(["1bbzA_0.pdb", "1gt0D_0.pdb", "1h3oA_0.pdb", "1kskA_1.pdb", "1l0sA_0.pdb"])

    def delete_erroneous_files(erroneous_paths):
        for f in erroneous_paths:
            if os.path.isfile(f):
                logger.warning("File flagged to be erroneous ... " + "removing from database: %s", f)
                os.remove(f)

    erroneous_paths = [os.path.join(database, name[1:3], name) for name in erroneous_files]
    delete_erroneous_files(erroneous_paths)

    dat_files = list(morda_dat_files - simbad_dat_files - erroneous_files)
    if len(dat_files) < 1:
        logger.info('SIMBAD database up-to-date')
        if not morda_installed_through_ccp4:
            shutil.rmtree(os.environ["MRD_DB"])
        leave_timestamp(os.path.join(database, 'simbad_morda.txt'))
        return
    else:
        logger.info("%d new entries were found in the MoRDa database, " + "updating SIMBAD database", len(dat_files))

    exe = os.path.join(os.environ["MRD_PROG"], "get_model")

    run_dir = tmp_dir(directory=os.getcwd())

    # Submit in chunks, so we don't take too much disk space
    # and can terminate without loosing the processed data
    total_chunk_cycles = len(dat_files) // chunk_size + (len(dat_files) % 5 > 0)
    for cycle, i in enumerate(range(0, len(dat_files), chunk_size)):
        logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles)
        chunk_dat_files = dat_files[i:i + chunk_size]

        # Create the database files
        what_to_do = []
        for f in chunk_dat_files:
            code = os.path.basename(f).rsplit('.', 1)[0]
            #final_file = os.path.join(database, code[1:3], code + ".dat")
            final_file = os.path.join(database, code[1:3], code + '.pdb')
            # We need a temporary directory within because "get_model" uses non-unique file names
            tmp_d = tmp_dir(directory=run_dir)
            get_model_output = os.path.join(tmp_d, code + ".pdb")
            script = make_script(
                [["export CCP4_SCR=", tmp_d], ["export MRD_DB=" + os.environ['MRD_DB']], ["cd", tmp_d],
                 [exe, "-c", code, "-m", "d"]],
                directory=tmp_d)
            log = script.rsplit('.', 1)[0] + '.log'
            what_to_do += [(script, log, tmp_d, (get_model_output, final_file))]

        scripts, _, tmps, files = zip(*what_to_do)
        j = Job(submit_qtype)
        j.submit(scripts, name='morda_db', nproc=nproc, queue=submit_queue)
        j.wait()

        sub_dir_names = set([os.path.basename(f).rsplit('.', 1)[0][1:3] for f in chunk_dat_files])
        for sub_dir_name in sub_dir_names:
            sub_dir = os.path.join(database, sub_dir_name)
            if os.path.isdir(sub_dir):
                continue
            os.makedirs(sub_dir)

        for output, final in files:
            if os.path.isfile(output):
                #simbad.db.convert_pdb_to_dat(output, final)
                shutil.move(output, final)
            else:
                logger.critical("File missing: {}".format(output))

        for d in tmps:
            shutil.rmtree(d)

    shutil.rmtree(run_dir)
    if not morda_installed_through_ccp4:
        shutil.rmtree(os.environ["MRD_DB"])

    validate_compressed_database(database)
    leave_timestamp(os.path.join(database, 'simbad_morda.txt'))
Example #7
0
def create_contaminant_db(database, add_morda_domains, nproc=2, submit_qtype=None, submit_queue=False):
    """Create a contaminant database

    Parameters
    ----------
    database : str
        The path to the database folder
    add_morda_domains : bool
        Retrospectively add morda domains to a contaminant database updated when morda was not installed
    nproc : int, optional
        The number of processors [default: 2]
    submit_qtype : str
        The cluster submission queue type - currently support SGE and LSF
    submit_queue : str
        The queue to submit to on the cluster

    Raises
    ------
    RuntimeError
        dimple.contaminants.prepare module not available
    RuntimeError
       Windows is currently not supported
    """
    if not is_valid_db_location(database):
        raise RuntimeError("Permission denied! Cannot write to {}!".format(os.path.dirname(database)))

    import dimple.main
    logger.info('DIMPLE version: %s', dimple.main.__version__)

    if StrictVersion(dimple.main.__version__) < StrictVersion('2.5.7'):
        msg = "This feature will be available with dimple version 2.5.7"
        raise RuntimeError(msg)

    if CUSTOM_PLATFORM == "windows":
        msg = "Windows is currently not supported"
        raise RuntimeError(msg)

    import dimple.contaminants.prepare

    dimple.contaminants.prepare.main(verbose=False)

    simbad_dat_path = os.path.join(database, '*', '*', '*', '*.dat')
    existing_dat_files = [os.path.basename(f).split('.')[0].lower() for f in glob.iglob(simbad_dat_path)]
    erroneous_files = ['4v43']
    dimple_files = ['cached', 'data.json', 'data.py']

    with open("data.json") as data_file:
        data = json.load(data_file)

    results = []
    for child in data["children"]:
        try:
            for child_2 in child["children"]:
                space_group = child_2["name"].replace(" ", "")
                for child_3 in child_2["children"]:
                    pdb_code = child_3["name"].split()[0].lower()
                    if (pdb_code in existing_dat_files or pdb_code in erroneous_files) and not add_morda_domains:
                        continue
                    uniprot_name = child["name"]
                    uniprot_mnemonic = uniprot_name.split('_')[1]
                    score = ContaminantSearchResult(pdb_code, space_group, uniprot_name, uniprot_mnemonic)
                    results.append(score)
        except KeyError:
            pass

    if len(results) == 0:
        logger.info("Contaminant database up to date")
    else:
        if add_morda_domains:
            logger.info("Adding morda domains to contaminant database")
        else:
            logger.info("%d new entries were found in the contaminant database, " + "updating SIMBAD database",
                        len(results))

        if "MRD_DB" in os.environ:
            morda_installed_through_ccp4 = True
        else:
            morda_installed_through_ccp4 = False

        if add_morda_domains and not morda_installed_through_ccp4:
            logger.critical("Morda not installed locally, unable to add morda domains to contaminant database")

        if morda_installed_through_ccp4:
            morda_dat_path = os.path.join(os.environ['MRD_DB'], 'home', 'ca_DOM', '*.dat')
            morda_dat_files = set([os.path.basename(f) for f in glob.iglob(morda_dat_path)])
            exe = os.path.join(os.environ['MRD_PROG'], "get_model")
        else:
            logger.info(
                "Morda not installed locally, therefore morda domains will not be added to contaminant database")

        what_to_do = []
        for result in results:
            stem = os.path.join(os.getcwd(), database, result.uniprot_mnemonic, result.uniprot_name, result.space_group)
            if not os.path.exists(stem):
                os.makedirs(stem)

            content = PdbStructure.get_pdb_content(result.pdb_code)
            if content is None:
                logger.debug("Encountered a problem downloading PDB %s - skipping entry", result.pdb_code)
            else:
                dat_content = simbad.db._str_to_dat(content)
                with open(os.path.join(stem, result.pdb_code + ".dat"), "w") as f_out:
                    f_out.write(dat_content)

                if simbad.db.is_valid_dat(os.path.join(stem, result.pdb_code + ".dat")):
                    pass
                else:
                    logger.debug("Unable to convert %s to dat file", result.pdb_code)

            if morda_installed_through_ccp4:
                for dat_file in morda_dat_files:
                    if result.pdb_code.lower() == dat_file[0:4]:
                        stem = os.path.join(database, result.uniprot_mnemonic, result.uniprot_name, result.space_group,
                                            "morda")
                        if not os.path.exists(stem):
                            os.makedirs(stem)
                        code = dat_file.rsplit('.', 1)[0]
                        final_file = os.path.join(stem, dat_file)
                        tmp_d = tmp_dir(directory=os.getcwd())
                        get_model_output = os.path.join(tmp_d, code + ".pdb")
                        script = make_script(
                            [["export CCP4_SCR=", tmp_d], ["cd", tmp_d], [exe, "-c", code, "-m", "d"]], directory=tmp_d)
                        log = script.rsplit('.', 1)[0] + '.log'
                        what_to_do += [(script, log, tmp_d, (get_model_output, final_file))]

        if len(what_to_do) > 0:
            scripts, _, tmps, files = zip(*what_to_do)
            j = Job(submit_qtype)
            j.submit(scripts, name='cont_db', nproc=nproc, queue=submit_queue)
            j.wait()

            for output, final in files:
                if os.path.isfile(output):
                    simbad.db.convert_pdb_to_dat(output, final)
                else:
                    print "File missing: {}".format(output)

            for d in tmps:
                shutil.rmtree(d)

            for f in dimple_files:
                if os.path.isdir(f):
                    shutil.rmtree(f)
                elif os.path.isfile(f):
                    os.remove(f)

    validate_compressed_database(database)