Beispiel #1
0
def run_modeller(knowns = ('2B4C', '4NCO'), seq = 'CAP45',
                  num_mod = 1):
    
    seq_dir = '../sequence_files/'
    pdb_dir = '../pdb_files/'
    
    j = job()
    j.append(local_slave())
    j.append(local_slave())
    j.append(local_slave())
    j.append(local_slave())
    j.append(local_slave())

    # Modeller environment
    env = environ()
    
    # Output
    log.none()
    
    #input dir
    env.io.atom_files_directory = [pdb_dir]
        
    #input file
    ali_file = seq_dir + 'align' + seq
    if isinstance(knowns, list):
        for i in range(len(knowns)):
            ali_file += '_' + knowns[i]
        ali_file += '.ali'
    else:
        ali_file += '_' + str(knowns) + '.ali'
    
    mod = automodel(env,
                    alnfile = ali_file,
                    knowns = knowns,
                    sequence = seq,
                    assess_methods = (assess.DOPE))

    mod.starting_model = 1
    mod.ending_model = num_mod

    mod.use_parallel_job(j)
    mod.make() 
    
    # Get list of all built models
    ok_models = [x for x in mod.outputs if x['failure'] is None]

    # Rank the models by DOPE score
    ok_models.sort(key = lambda mod: mod['DOPE score'])

    # Get top model
    m = ok_models[0]
    print("The best model {} with a dope of {}".format(m['name'], m['DOPE score']))
Beispiel #2
0
 def modeller_automodel(self, query: SeqRecord, results: Path,
                        num_align: int, atom_files_dir: Path):
     from modeller import environ
     from modeller.automodel import automodel
     for model_index, r in enumerate(
             np.load(results, allow_pickle=True)[:num_align]):
         try:
             aln = AlignIO.read(StringIO(r[-2][0]), 'clustal')
         except:
             logging.error(
                 f'Failed to parse alignment: {r[0]} -> {r[2]} -> {r[4]} -> {r[6]}'
             )
             continue
         assert query.id == aln[0].id and aln[-1].id == r[-3]
         q_rec, t_rec = self._remove_gaps(aln[0], aln[-1])
         try:
             t_rec = self._remove_missing_res(
                 t_rec, (atom_files_dir / aln[-1].id[2:4] /
                         f'{aln[-1].id}.ent').resolve().as_posix())
         except FileNotFoundError as e:
             logging.exception(e)
             continue
         q_rec.name, t_rec.name = '', ''
         q_rec.description = f'sequence:{q_rec.id}::::::::'
         t_rec.description = f'structureX:{t_rec.id}::{t_rec.id[-2].upper()}::{t_rec.id[-2].upper()}::::'
         aln = MultipleSeqAlignment([q_rec, t_rec])
         out_d = results.resolve().parent
         if (out_d / f'{aln[0].id}_{model_index+1}.pdb').exists():
             continue
         cwd = os.getcwd()
         with tempfile.TemporaryDirectory() as tmpdir:
             try:
                 os.chdir(tmpdir)
                 AlignIO.write(aln, 'aln.pir', 'pir')
                 env = environ()
                 env.io.atom_files_directory = [
                     (atom_files_dir / aln[1].id[2:4]).resolve().as_posix()
                 ]
                 mod = automodel(env,
                                 'aln.pir',
                                 knowns=[aln[1].id],
                                 sequence=aln[0].id)
                 mod.make()
                 shutil.copy(
                     list(Path().glob('*.B*.pdb'))[0],
                     out_d / f'{aln[0].id}_{model_index+1}.pdb')
             except Exception as e:
                 logging.error(
                     f'knowns=[{aln[1].id}], sequence={aln[0].id}')
                 logging.exception(e)
             finally:
                 os.chdir(cwd)
Beispiel #3
0
    def _automodel_run(self, alin_file, knowns):
        amdl = automodel(self.env,
                         alnfile=alin_file,
                         knowns=knowns,
                         sequence='target',
                         assess_methods=(assess.DOPE, assess.GA341))
        amdl.starting_model = 1
        amdl.ending_model = 1

        orig_dir = os.getcwd()
        os.chdir(self.tmpdir)
        amdl.make()
        os.chdir(orig_dir)

        return amdl.outputs[0]
Beispiel #4
0
def create_models(alnfile,
                  knownid,
                  sequenceid,
                  pdbfile,
                  model_number=5,
                  dir=''):
    if dir:
        cdir = os.getcwd()
        ndir = cdir + '/' + dir
        os.mkdir(ndir)
        os.chdir(ndir)
        sp.check_call("cp ../" + pdbfile + ' ' + pdbfile, shell=True)
    try:
        from modeller import environ
        from modeller.automodel import automodel
    except ImportError:
        print(
            'Make Sure Python Modeller is installed. Double check License Key is in Modeller config.py file'
        )
        sys.exit()
    env = environ()
    if dir:
        a = automodel(env,
                      alnfile='../' + alnfile,
                      knowns=knownid,
                      sequence=sequenceid)
    else:
        a = automodel(env,
                      alnfile=alnfile,
                      knowns=knownid,
                      sequence=sequenceid)
    a.starting_model = 1
    a.ending_model = model_number
    a.make()
    if dir:
        os.chdir(cdir)
Beispiel #5
0
def model3D(fic, ALLPDB, pdb_extension='.cif'):
    if _MODELLER_MESSAGE != "":
        raise ImportError(_MODELLER_MESSAGE)

    seqs = readFastaMul(fic)

    if len(seqs) < 2:
        raise Exception("There aren't template sequences in %s." % fic)

    seq = seqs[0][0].split('\n')[0]
    seq = seq.split(';')[1]

    modeller.log.verbose()  # request verbose output
    env = modeller.environ()  # create a new MODELLER environment to build ...
    # ... this model in
    env.io.atom_files_directory = ['.']  # ['.', ALLPDB]
    knowns = []
    for i in range(1, len(seqs)):
        tmp = seqs[i][0].split('\n')[0]
        tmp = tmp.split(';')[1]
        knowns.append(tmp)
        kn = tmp.split('_')[0].lower()  # upper ?
        base_name = kn + pdb_extension
        nam = base_name  # + '.gz'
        if not os.path.isfile(kn + pdb_extension):
            # shutil.copyfile(ALLPDB + nam, "./" + nam)
            shutil.copy2(os.path.join(ALLPDB, nam), nam)
            # # os.system("gunzip ./" + nam)
            # with gzip.open(nam, 'rb') as f_in:
            #     with open(base_name, 'wb') as f_out:
            #         shutil.copyfileobj(f_in, f_out)

            # splitChainsPDB('pdb' + kn + pdb_extension, kn, 'pdb')
    knowns = tuple(knowns)
    a = automodel.automodel(env, alnfile=fic, knowns=knowns, sequence=seq)
    a.max_molpdf = 1e12
    a.starting_model = 1  # index of the first model
    a.ending_model = 1  # index of the last model
    # (determines how many models to calculate)
    cdir = os.getcwd()
    print("cdir = ", cdir)
    a.make()  # do the actual homology modeling

    return 1
Beispiel #6
0
    def run(self, seqId):
        assert (self.check(seqId))
        from modeller import log, environ
        from modeller.automodel import automodel

        #log.verbose()
        #log.none()
        log.level(output=0, errors=0, notes=0, warnings=0, memory=0)
        env = environ()
        env.io.atom_files_directory = [self.template[0]]

        a = automodel(env,
                      alnfile=self.pir,
                      knowns=self.template[1],
                      sequence=seqId)
        a.auto_align()
        a.make()
        fn = '{}.B99990001.pdb'.format(seqId)
        assert (os.path.isfile(fn))
        return os.path.abspath(fn)
Beispiel #7
0
 def modeller_automodel(self, query: SeqRecord, results: Path,
                        num_align: int, atom_files_dir: Path):
     from modeller import environ, log
     from modeller.automodel import automodel
     raw_df = pandas.read_csv(
         'data/delta_new_hits_thresh10_xdug10_xdg10_xdgf10.csv')
     for row in tqdm(raw_df.itertuples(), total=raw_df.shape[0]):
         try:
             aln = SearchIO.read(StringIO(row.XML), 'blast-xml').hsps[0].aln
         except IndexError:
             continue
         assert aln[0].id == row.Query and aln[1].id == row.Hit
         q_rec, t_rec = self._remove_gaps(aln[0], aln[-1])
         q_rec.name = ''
         t_rec.name = ''
         q_rec.description = f'sequence:{row.Query}::::::::'
         t_rec.description = f'structureX:{row.Hit}::{row.Hit[-2].upper()}::{row.Hit[-2].upper()}::::'
         aln = MultipleSeqAlignment([q_rec, t_rec])
         out_d = results.resolve()
         cwd = os.getcwd()
         with tempfile.TemporaryDirectory() as tmpdir:
             try:
                 os.chdir(tmpdir)
                 AlignIO.write(aln, 'aln.pir', 'pir')
                 log.none()
                 env = environ()
                 env.io.atom_files_directory = [
                     (atom_files_dir / aln[1].id[2:4]).resolve().as_posix()
                 ]
                 mod = automodel(env,
                                 'aln.pir',
                                 knowns=[aln[1].id],
                                 sequence=aln[0].id)
                 mod.make()
                 shutil.copy(
                     list(Path().glob('*.B*.pdb'))[0],
                     out_d / f'{aln[0].id}_{aln[1].id}.pdb')
             except:
                 pass
             finally:
                 os.chdir(cwd)
Beispiel #8
0
def main(args):
    mod.log.verbose()
    env = mod.environ(rand_seed=args.seed)
    env.io.atom_files_directory = [".", args.dir, "../" + args.dir]

    seq = args.target.replace(".ali", "")
    alnfile = os.path.join(args.dir, seq + "-" + args.template.replace(".pdb", ".ali"))

    # in order to use the soap assess method, refer to https://salilab.org/SOAP/ and
    # download the SOAP-Protein library file. Put this file in
    # your-installation-path/lib/modeller-9.25/modlib/
    # otherwise, just comment the respective line
    a = automodel.automodel(
        env,
        alnfile=alnfile,
        knowns=args.template.replace(".pdb", ""),
        sequence=seq,
        assess_methods=(
            automodel.assess.DOPE,
            automodel.assess.GA341,
            # soap_protein_od.Scorer(),
        ),
    )

    a.starting_model, a.ending_model = 1, args.num_models
    a.make()

    # get list of all successfully built models from a.outputs
    models = [m for m in a.outputs if m["failure"] is None]
    key = "DOPE score"
    models.sort(key=lambda a: a[key])

    # print and return top model DOPE score
    top_model = models[0]
    print("Top model: %s (DOPE score %.3f)" % (top_model["name"], top_model[key]))
    return top_model["name"]
Beispiel #9
0
def process(req, rep):
    '''Processes a single request to the server, storing the result in `rep`'''
    from modeller.automodel.assess import DOPE, GA341
    from modeller.automodel import automodel
    logger = logging.getLogger('modeller_server')
    logger.info('Processing job=%s, recipient=%s, alignments=%d' % (req.identifier, req.recipient, len(req.alignments)))

    # In order to prevent filename collisions, independent runs of modeller
    # are executed in separate directories
    curr_dir = os.getcwd()
    work_dir = tempfile.mkdtemp()
    os.chdir(work_dir)

    # Populate required fields in the response
    rep.recipient = req.recipient
    rep.identifier = req.identifier

    # N * M (model, alignment, score) tuples, where N = #alignments and M = #models per alignment
    candidates = []
    for alignment in req.alignments:
        query_id = 'query'
        templ_id = str(alignment.templ_pdb + alignment.templ_chain)

        # Write template structure
        templ_file = templ_id + '.pdb'
        with open(templ_file, 'w') as file:
            file.write('%s\n' % alignment.templ_structure)

        # Write alignment
        alignment_file = templ_id + '.ali'
        with open(alignment_file, 'w') as file:
            params = { 'query_id'    : query_id,
                       'query_start' : alignment.query_start,
                       'query_stop'  : alignment.query_stop,
                       'query_align' : alignment.query_align }

            query_line = query_alignment.safe_substitute(params)
            file.write('%s\n' % query_line)

            params = { 'templ_id'    : templ_id,
                       'templ_pdb'   : templ_file,
                       'templ_chain' : alignment.templ_chain,
                       'templ_start' : alignment.templ_start,
                       'templ_stop'  : alignment.templ_stop,
                       'templ_align' : alignment.templ_align }

            templ_line = templ_alignment.safe_substitute(params)
            file.write('%s\n' % templ_line)

        # Run modeler
        modeller.log.verbose()
        env = modeller.environ()
        env.io.atom_files_directory = ['.']
        am = automodel(env,
                       alnfile = alignment_file,
                       knowns = templ_id,
                       sequence = query_id,
                       assess_methods = (DOPE, GA341))

        am.starting_model = 1
        am.ending_model = FLAGS.models_per_alignment
        am.make()

        # Rank successful predictions by DOPE score
        models = [x for x in am.outputs if x['failure'] is None]
        models.sort(key = lambda x: x['DOPE score'])
        logger.info('Produced %d models for alignment %s' % (len(models), templ_id))

        for model in models:
            with open(model['name']) as file:
                coords = ''
                for line in file:
                    coords += line
            
            entry = (coords, alignment, model['DOPE score'])
            candidates.append(entry)

    # Sort all N * M candidate models in increasing order of DOPE score, returning the top K
    candidates.sort(key = operator.itemgetter(-1))
    for (i, entry) in enumerate(candidates):
        coords, alignment, score = entry
        selection = rep.selected.add()
        selection.rank = i + 1

        # Append alignment information to bottom of PDB file
        selection.model = coords
        selection.model += 'Source: %s\n' % alignment.method
        selection.model += 'Template: %s\n' % templ_id
        selection.model += 'Query alignment: %s\n' % alignment.query_align
        selection.model += 'Templ alignment: %s\n' % alignment.templ_align

        # Message types cannot be assigned directory (e.g. x.field = field).
        # For additional details, read the "Singular Message Fields" section in:
        # https://developers.google.com/protocol-buffers/docs/reference/python-generated#fields
        selection.alignment.ParseFromString(alignment.SerializeToString())
        if (selection.rank == FLAGS.max_models_to_return):
            break

    os.chdir(curr_dir)
    shutil.rmtree(work_dir)
    logger.info('Completed job=%s, recipient=%s' % (req.identifier, req.recipient))
Beispiel #10
0
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None,
        nmodels=1, hydro=0, quiet=1, *, _self=cmd):
    '''
DESCRIPTION

    Remodel the given selection using modeller. This is useful for example to
    build incomplete sidechains. More complicated modelling tasks are not
    the intention of this simple interface.

    Side effects: Alters "type" property for MSE residues in selection
    (workaround for bug #3512313).

USAGE

    peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]]

ARGUMENTS

    name = string: new object name

    selection = string: atom selection

    hetatm = 0/1: read and model HETATMs (ligands) {default: 0}

    sequence = string: if provided, use this sequence instead of the
    template sequence {default: None}

    nmodels = int: number of models (states) to generate {default: 1}
    '''
    import modeller
    from modeller.automodel import automodel, allhmodel

    import tempfile, shutil, os
    _assert_package_import()
    from .editing import update_identifiers

    nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet)

    if int(hydro):
        automodel = allhmodel

    tempdir = tempfile.mkdtemp()
    pdbfile = os.path.join(tempdir, 'template.pdb')
    alnfile = os.path.join(tempdir, 'aln.pir')

    cwd = os.getcwd()
    os.chdir(tempdir)

    if not quiet:
        print(' Notice: PWD=%s' % (tempdir))

    try:
        modeller.log.none()
        env = modeller.environ()
        env.io.hetatm = hetatm

        # prevent PyMOL to put TER records before MSE residues (bug #3512313)
        _self.alter('(%s) and polymer' % (selection), 'type="ATOM"')

        _self.save(pdbfile, selection)
        mdl = modeller.model(env, file=pdbfile)

        aln = modeller.alignment(env)
        aln.append_model(mdl, align_codes='foo', atom_files=pdbfile)

        # get sequence from non-present atoms
        if not sequence and _self.count_atoms('(%s) & !present' % (selection)):
            sequence = get_seq(selection)

        if sequence:
            aln.append_sequence(sequence)
            aln[-1].code = 'bar'
            aln.malign()
        aln.write(alnfile)

        a = automodel(env, alnfile=alnfile, sequence=aln[-1].code,
                knowns=[s.code for s in aln if s.prottyp.startswith('structure')])
        a.max_ca_ca_distance = 30.0

        if nmodels > 1:
            a.ending_model = nmodels
            from multiprocessing import cpu_count
            ncpu = min(cpu_count(), nmodels)
            if ncpu > 1:
                from modeller import parallel
                job = parallel.job(parallel.local_slave()
                        for _ in range(ncpu))
                a.use_parallel_job(job)

        a.make()

        for output in a.outputs:
            _self.load(output['name'], name, quiet=quiet)
    finally:
        os.chdir(cwd)
        shutil.rmtree(tempdir)

    _self.align(name, selection, cycles=0)
    if not sequence:
        update_identifiers(name, selection, _self=_self)

    if not quiet:
        print(' peptide_rebuild_modeller: done')
Beispiel #11
0
def run_modeller(structure, alignment, temp_dir: Union[str, Path, Callable]):
    """Run Modeller to create a homology model.

    Args:
        structure: Structure of the template protein.
        alignment_file: Alignment of the target sequence(s) to chain(s) of the template structure.
        temp_dir: Location to use for storing Modeller temporary files and output.

    Returns:
        results: A dictionary of model properties. Of particular interest are the followng:

            `name`: The name of the generated PDB structure.
            `Normalized DOPE score`: DOPE score that should be comparable between structures.
            `GA341 score`: GA341 score that should be comparable between structures.
    """
    import modeller
    from modeller.automodel import assess, automodel, autosched

    if isinstance(structure, (str, Path)):
        structure = PDB.load(structure)

    if callable(temp_dir):
        temp_dir = Path(temp_dir())
    else:
        temp_dir = Path(temp_dir)

    assert len(alignment) == 2
    target_id = alignment[0].id
    template_id = alignment[1].id

    PDB.save(structure, temp_dir.joinpath(f"{template_id}.pdb"))
    alignment_file = temp_dir.joinpath(f"{template_id}-{target_id}.aln")
    structure_tools.write_pir_alignment(alignment, alignment_file)

    # Don't display log messages
    modeller.log.none()

    # Create a new MODELLER environment
    env = modeller.environ()

    # Directories for input atom files
    env.io.atom_files_directory = [str(temp_dir)]
    env.schedule_scale = modeller.physical.values(default=1.0, soft_sphere=0.7)

    # Selected atoms do not feel the neighborhood
    # env.edat.nonbonded_sel_atoms = 2
    env.io.hetatm = True  # read in HETATM records from template PDBs
    env.io.water = True  # read in WATER records (including waters marked as HETATMs)

    a = automodel(
        env,
        # alignment filename
        alnfile=str(alignment_file),
        # codes of the templates
        knowns=(str(template_id)),
        # code of the target
        sequence=str(target_id),
        # wich method for validation should be calculated
        assess_methods=(assess.DOPE, assess.normalized_dope, assess.GA341),
    )
    a.starting_model = 1  # index of the first model
    a.ending_model = 1  # index of the last model

    # Very thorough VTFM optimization:
    a.library_schedule = autosched.slow
    a.max_var_iterations = 300

    # Thorough MD optimization:
    # a.md_level = refine.slow
    a.md_level = None

    # a.repeat_optimization = 2

    # Stop if the objective function is higher than this value
    a.max_molpdf = 2e6

    with py_tools.log_print_statements(logger), system_tools.switch_paths(temp_dir):
        a.make()

    assert len(a.outputs) == 1
    return a.outputs[0]
Beispiel #12
0
    def __run_modeller(self, alignFile, loopRefinement):
        """.

        Parameters
        ----------
        alignFile : string
            File containing the input data
        result : list
            The successfully calculated models are stored in this list
        loopRefinement : boolean
            If `True`, perform loop refinements

        Returns
        -------
        list
            Successfully calculated models
        """
        log.none()  # instructs Modeller to display no log output.
        env = environ()  # create a new MODELLER environment to build this model in

        # Directories for input atom files
        env.io.atom_files_directory = [str(self.filePath.rstrip('/')), ]
        env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7)

        # Selected atoms do not feel the neighborhood
        # env.edat.nonbonded_sel_atoms = 2
        env.io.hetatm = True  # read in HETATM records from template PDBs
        env.io.water = True  # read in WATER records (including waters marked as HETATMs)

        logger.debug(
            'Performing loop refinement in addition to regular modelling: {}'
            .format(loopRefinement)
        )
        if not loopRefinement:
            a = automodel(
                env,
                # alignment filename
                alnfile=str(alignFile),
                # codes of the templates
                knowns=(str(self.templateID)),
                # code of the target
                sequence=str(self.seqID),
                # wich method for validation should be calculated
                assess_methods=(assess.DOPE, assess.normalized_dope)
            )
        else:
            a = dope_loopmodel(
                env,
                # alignment filename
                alnfile=str(alignFile),
                # codes of the templates
                knowns=(str(self.templateID)),
                # code of the target
                sequence=str(self.seqID),
                # wich method for validation should be calculated
                assess_methods=(assess.DOPE, assess.normalized_dope),
                loop_assess_methods=(assess.DOPE, assess.normalized_dope)
            )
            # index of the first loop model
            a.loop.starting_model = self.loopStart
            # index of the last loop model
            a.loop.ending_model = self.loopEnd
            # loop refinement method; this yields
            a.loop.md_level = refine.slow

        a.starting_model = self.start  # index of the first model
        a.ending_model = self.end  # index of the last model

        # Very thorough VTFM optimization:
        a.library_schedule = autosched.slow
        a.max_var_iterations = 300

        # Thorough MD optimization:
        # a.md_level = refine.slow
        a.md_level = None

        # Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6
        # a.repeat_optimization = 2

        a.max_molpdf = 2e5

        # with helper.print_heartbeats():  # use 'long_wait' in .travis.yml
        with helper.log_print_statements(logger):
            a.make()  # do the actual homology modeling

        # The output produced by modeller is stored in a.loop.outputs or a.outputs
        # it is a dictionary
        # Check for each model if it was successfully calculated, i.e.
        # for each "normal" model and each loop model and append the
        # assessment score to a list which is used to return the best model
        result = []
        loop = False
        failures = []
        # Add the normal output
        for i in range(len(a.outputs)):
            if not a.outputs[i]['failure']:
                model_filename = a.outputs[i]['name']
                model_dope_score = a.outputs[i]['Normalized DOPE score']
                logger.debug(
                    'Success! model_filename: {}, model_dope_score: {}'
                    .format(model_filename, model_dope_score))
                result.append((model_filename, model_dope_score))
            else:
                failure = a.outputs[i]['failure']
                logger.debug('Failure! {}'.format(failure))
                failures.append(a.outputs[i]['failure'])

        # Add the loop refinement output
        if loopRefinement:
            logger.debug('Modeller loop outputs:')
            for i in range(len(a.loop.outputs)):
                if not a.loop.outputs[i]['failure']:
                    model_filename = a.loop.outputs[i]['name']
                    model_dope_score = a.loop.outputs[i]['Normalized DOPE score']
                    logger.debug(
                        'Success! model_filename: {}, model_dope_score: {}'
                        .format(model_filename, model_dope_score))
                    result.append((model_filename, model_dope_score))
                    loop = True
                else:
                    failure = a.loop.outputs[i]['failure']
                    logger.debug('Failure! {}'.format(failure))
                    failures.append(failure)

        # Return the successfully calculated models and a loop flag indicating
        # whether the returned models are loop refined or not
        return result, loop, failures
Beispiel #13
0
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None,
        nmodels=1, hydro=0, quiet=1):
    '''
DESCRIPTION

    Remodel the given selection using modeller. This is useful for example to
    build incomplete sidechains. More complicated modelling tasks are not
    the intention of this simple interface.

    Side effects: Alters "type" property for MSE residues in selection
    (workaround for bug #3512313).

USAGE

    peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]]

ARGUMENTS

    name = string: new object name

    selection = string: atom selection

    hetatm = 0/1: read and model HETATMs (ligands) {default: 0}

    sequence = string: if provided, use this sequence instead of the
    template sequence {default: None}

    nmodels = int: number of models (states) to generate {default: 1}
    '''
    try:
        import modeller
        from modeller.automodel import automodel, allhmodel
    except ImportError:
        print(' Error: failed to import "modeller"')
        raise CmdException

    import tempfile, shutil, os
    from .editing import update_identifiers

    nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet)

    if int(hydro):
        automodel = allhmodel

    tempdir = tempfile.mkdtemp()
    pdbfile = os.path.join(tempdir, 'template.pdb')
    alnfile = os.path.join(tempdir, 'aln.pir')

    cwd = os.getcwd()
    os.chdir(tempdir)

    if not quiet:
        print(' Notice: PWD=%s' % (tempdir))

    try:
        modeller.log.none()
        env = modeller.environ()
        env.io.hetatm = hetatm

        # prevent PyMOL to put TER records before MSE residues (bug #3512313)
        cmd.alter('(%s) and polymer' % (selection), 'type="ATOM"')

        cmd.save(pdbfile, selection)
        mdl = modeller.model(env, file=pdbfile)

        aln = modeller.alignment(env)
        aln.append_model(mdl, align_codes='foo', atom_files=pdbfile)
        if sequence:
            aln.append_sequence(sequence)
            aln[-1].code = 'bar'
            aln.malign()
        aln.write(alnfile)

        a = automodel(env, alnfile=alnfile, sequence=aln[-1].code,
                knowns=[s.code for s in aln if s.prottyp.startswith('structure')])
        a.max_ca_ca_distance = 30.0

        if nmodels > 1:
            a.ending_model = nmodels
            from multiprocessing import cpu_count
            ncpu = min(cpu_count(), nmodels)
            if ncpu > 1:
                from modeller import parallel
                job = parallel.job(parallel.local_slave()
                        for _ in range(ncpu))
                a.use_parallel_job(job)

        a.make()

        for output in a.outputs:
            cmd.load(output['name'], name, quiet=quiet)
    finally:
        os.chdir(cwd)
        shutil.rmtree(tempdir)

    cmd.align(name, selection, cycles=0)
    if not sequence:
        update_identifiers(name, selection)

    if not quiet:
        print(' peptide_rebuild_modeller: done')
Beispiel #14
0
    def calculate_modeller_score(self, res_path):
        """
            * This function constructs a single comparative model for the query
              sequence from the known template structure, using alignment.ali,
              a PIR format alignment of query and template. The final model is
              written into the PDB file.
            * This function also returns the DOPE assessed score of the model
              generated by MODELLER. It actually returns the opposite (multiplied by -1) since it
              is an energy score. This is to simplify the min/max normalization done afterwards.
              DOPE is the most reliable score at separating native-like models from decoys (lower,
              i.e, more negative, DOPE scores tend to correlate with more native-like models).

            Args:
                res_path (str): Path to the results folder.

            Returns:
                score(float): The DOPE score (multiplied by -1) of the model generated by MODELLER.
        """
        root_dir = os.getcwd()
        modeller_out_dir = res_path + "/modeller/"
        ali_dir = "alignments/"
        pathlib.Path(modeller_out_dir + ali_dir).mkdir(parents=True,
                                                       exist_ok=True)
        # MODELLER generates the result files in his current directory, so we must
        # go to the results directory and come back to root dir afterwards.
        os.chdir(modeller_out_dir)
        path_to_atm = root_dir + "/data/pdb/" + self.template.name
        # We reindex all the PDB files to avoid any problem with modeller
        self.template.reindex_pdb(1, path_to_atm, True)
        # Parse the new PDB to get new residues and their coordinates generated by MODELLER
        self.template.parse_pdb(path_to_atm + "/" +
                                self.template.reindexed_pdb + ".atm")
        # Write Modeller's alignment PIR file
        self.write_alignment_for_modeller("./alignments/")
        # Redirect Modeller's verbose into nothingness, nil, chaos and abysses !
        with contextlib.redirect_stdout(None):
            # create a new MODELLER environment to build this model in
            m.env = m.environ()
            # directories for input atom files
            m.env.io.atom_files_directory = [path_to_atm]
            a_model = am.automodel(
                m.env,
                # alignment filename
                alnfile=ali_dir + self.template.name + '.ali',
                # codes of the templates
                knowns=self.template.reindexed_pdb,
                # code of the target
                sequence='query_' + self.template.name,
                # DOPEHR is very similar to DOPEHR but is obtained at
                # Higher Resolution (using a bin size of 0.125Å
                # rather than 0.5Å).
                assess_methods=assess.DOPEHR)
            a_model.very_fast()
            # index of the first and last model (determines how many models to calculate)
            a_model.starting_model = 1
            a_model.ending_model = 1
            modeller_dope_score = 0
            # Catch any errors that Modeller can raise and write them in the log file
            try:
                a_model.make()
            except m.ModellerError as err:
                logging.warning(
                    "Modeller error with " + self.template.name + " | " +
                    self.template.pdb, str(err))
        new_model_pdb = a_model.outputs[0]["name"]
        modeller_dope_score = a_model.outputs[0]["DOPE-HR score"]
        self.template.modeller_pdb = self.template.pdb + "_mod"
        # Move the new model to the PDB directory and rename it
        os.rename(new_model_pdb,
                  path_to_atm + "/" + self.template.modeller_pdb + ".atm")
        # Parse the new model generated by MODELLER to get the residues and their coordinates
        self.template.parse_pdb(path_to_atm + "/" +
                                self.template.modeller_pdb + ".atm")
        # Go back to root directory
        os.chdir(root_dir)
        return modeller_dope_score * (-1)
Beispiel #15
0
    def __run_modeller(self, alignFile, loopRefinement):
        """.

        Parameters
        ----------
        alignFile : string
            File containing the input data
        result : list
            The successfully calculated models are stored in this list
        loopRefinement : boolean
            If `True`, perform loop refinements

        Returns
        -------
        list
            Successfully calculated models
        """
        log.none()  # instructs Modeller to display no log output.
        env = environ(
        )  # create a new MODELLER environment to build this model in

        # Directories for input atom files
        env.io.atom_files_directory = [
            str(self.filePath.rstrip("/")),
        ]
        env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7)

        # Selected atoms do not feel the neighborhood
        # env.edat.nonbonded_sel_atoms = 2
        env.io.hetatm = True  # read in HETATM records from template PDBs
        env.io.water = True  # read in WATER records (including waters marked as HETATMs)

        logger.debug(
            "Performing loop refinement in addition to regular modelling: {}".
            format(loopRefinement))
        if not loopRefinement:
            a = automodel(
                env,
                # alignment filename
                alnfile=str(alignFile),
                # codes of the templates
                knowns=(str(self.templateID)),
                # code of the target
                sequence=str(self.seqID),
                # wich method for validation should be calculated
                assess_methods=(assess.DOPE, assess.normalized_dope),
            )
        else:
            a = dope_loopmodel(
                env,
                # alignment filename
                alnfile=str(alignFile),
                # codes of the templates
                knowns=(str(self.templateID)),
                # code of the target
                sequence=str(self.seqID),
                # wich method for validation should be calculated
                assess_methods=(assess.DOPE, assess.normalized_dope),
                loop_assess_methods=(assess.DOPE, assess.normalized_dope),
            )
            # index of the first loop model
            a.loop.starting_model = self.loopStart
            # index of the last loop model
            a.loop.ending_model = self.loopEnd
            # loop refinement method; this yields
            a.loop.md_level = refine.slow

        a.starting_model = self.start  # index of the first model
        a.ending_model = self.end  # index of the last model

        # Very thorough VTFM optimization:
        a.library_schedule = autosched.slow
        a.max_var_iterations = 300

        # Thorough MD optimization:
        # a.md_level = refine.slow
        a.md_level = None

        # Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6
        # a.repeat_optimization = 2

        a.max_molpdf = 2e5

        # with helper.print_heartbeats():  # use 'long_wait' in .travis.yml
        with helper.log_print_statements(logger):
            a.make()  # do the actual homology modeling

        # The output produced by modeller is stored in a.loop.outputs or a.outputs
        # it is a dictionary
        # Check for each model if it was successfully calculated, i.e.
        # for each "normal" model and each loop model and append the
        # assessment score to a list which is used to return the best model
        result = []
        loop = False
        failures = []
        # Add the normal output
        for i in range(len(a.outputs)):
            if not a.outputs[i]["failure"]:
                model_filename = a.outputs[i]["name"]
                model_dope_score = a.outputs[i]["Normalized DOPE score"]
                logger.debug(
                    "Success! model_filename: {}, model_dope_score: {}".format(
                        model_filename, model_dope_score))
                result.append((model_filename, model_dope_score))
            else:
                failure = a.outputs[i]["failure"]
                logger.debug("Failure! {}".format(failure))
                failures.append(a.outputs[i]["failure"])

        # Add the loop refinement output
        if loopRefinement:
            logger.debug("Modeller loop outputs:")
            for i in range(len(a.loop.outputs)):
                if not a.loop.outputs[i]["failure"]:
                    model_filename = a.loop.outputs[i]["name"]
                    model_dope_score = a.loop.outputs[i][
                        "Normalized DOPE score"]
                    logger.debug(
                        "Success! model_filename: {}, model_dope_score: {}".
                        format(model_filename, model_dope_score))
                    result.append((model_filename, model_dope_score))
                    loop = True
                else:
                    failure = a.loop.outputs[i]["failure"]
                    logger.debug("Failure! {}".format(failure))
                    failures.append(failure)

        # Return the successfully calculated models and a loop flag indicating
        # whether the returned models are loop refined or not
        return result, loop, failures