Beispiel #1
0
def load_traj(filepath: PathLike, **kwargs) -> mdtraj.Trajectory:
    """Load a trajectory, if a PDB fails with zero box volume disable volume check and try again."""
    filepath = pathlib.Path(filepath)

    if filepath.suffix.lower() == ".pdb":
        # PDBs have a couple of things that can go wrong - we handle these here...
        kwargs.pop("top", None)  # Can't specify a topology for `load_pdb`

        try:
            return mdtraj.load_pdb(str(filepath), **kwargs)

        except FloatingPointError:
            # PDB file loading checks density using the simulation box
            # This can fail if the box volume is zero
            trajectory = mdtraj.load_pdb(str(filepath),
                                         no_boxchk=True,
                                         **kwargs)
            logger.warning(
                "Unitcell has zero volume - periodic boundaries will not be accounted for. "
                "If the molecule is split by a periodic boundary, results will be incorrect."
            )

            return trajectory

    return mdtraj.load(str(filepath), **kwargs)
Beispiel #2
0
    def from_pdb(self, filename, selection=None, probe_radius=1.4, **kwargs):
        r"""Calculate solvent accessible surface area (SASA) from a PDB file

        If the PBD contains more than one structure, calculation is performed
        only for the first one.

        SASA units are Angstroms squared

        Parameters
        ----------
        filename: str
            Path to the PDB file
        selection: str
            Atomic selection for calculating SASA. All atoms considered if
            default None is passed. See the
        `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_
        for atom selection syntax.
        probe_radius: float
            The radius of the probe, in Angstroms
        kwargs: dict
            Optional arguments for the underlying mdtraj.shrake_rupley
             algorithm doing the actual SaSa calculation

        Returns
        -------
        self: :class:`~idpflex.properties.SaSa`
            Instantiated SaSa property object
        """  # noqa: E501
        self.selection = selection
        a_traj = mdtraj.load_pdb(filename)
        if selection is not None:
            selection = a_traj.top.select(selection)  # atomic indices
            a_traj = mdtraj.load_pdb(filename, atom_indices=selection)
        return self.from_mdtraj(a_traj, probe_radius=probe_radius, **kwargs)
Beispiel #3
0
def verify_transform(pdb1, pdb2, lenVector):
    """Verifie que c'est la même protéine dans les deux fichiers pdb puis revoie leur coordonnées
	sous forme de deux listes
		-Args:
			_pdb1: fichier au format pdb
			_pdb2: fichier au format pdb
	"""
    pdb_1 = md.load_pdb(pdb1)  # lecture du fichier pdb par le module mdtraj
    pdb_2 = md.load_pdb(pdb2)
    if pdb_1.n_atoms != pdb_2.n_atoms or pdb_1.n_residues != pdb_2.n_residues:
        print(
            "Pas le même nombre d'atomes/residues entre les deux structures !!"
        )
        print("Nombre d'atomes : {} vs {}".format(pdb_1.n_atoms,
                                                  pdb_2.n_atoms))
        print("Nombre de résidues : {} vs {}".format(pdb_1.n_residues,
                                                     pdb_2.n_residues))
        sys.exit()
    if pdb_1.n_atoms * 3 != lenVector:
        print(
            "Nombre de coordonnées du système: {} vs nombre de coordonnées normales{}"
            .format(pdb_1.n_atoms * 3, lenVector))
        print(
            "Pas le même nombre de coordonnées normales et de coordonnées atomiques !!"
        )
        sys.exit()
    xyz1 = [
        c * 10 for l in pdb_1[0].xyz[0] for c in l
    ]  # conversion des coordonnées dans un autre format et passage des nm aux angström
    xyz2 = [c * 10 for l in pdb_2[0].xyz[0] for c in l]

    return (xyz1, xyz2)
Beispiel #4
0
def test_pdb_from_url():
    # load pdb from URL
    t1 = load_pdb('http://www.rcsb.org/pdb/files/4K6Q.pdb.gz')
    t2 = load_pdb('http://www.rcsb.org/pdb/files/4K6Q.pdb')
    eq(t1.n_frames, 1)
    eq(t2.n_frames, 1)
    eq(t1.n_atoms, 2208)
    eq(t2.n_atoms, 2208)
Beispiel #5
0
def test_pdb_from_url():
    # load pdb from URL
    t1 = load_pdb('http://www.rcsb.org/pdb/files/4K6Q.pdb.gz')
    t2 = load_pdb('http://www.rcsb.org/pdb/files/4K6Q.pdb')
    eq(t1.n_frames, 1)
    eq(t2.n_frames, 1)
    eq(t1.n_atoms, 2208)
    eq(t2.n_atoms, 2208)
Beispiel #6
0
def test_load_pdb_input_top(get_fn):

    pdb = get_fn('native.pdb')
    p_1 = load_pdb(pdb)

    p_2 = load_pdb(pdb, top=p_1.topology)

    eq(p_1.xyz, p_2.xyz)
    eq(p_1.topology, p_2.topology)
Beispiel #7
0
    def __init__(self, FG_fname, CG_fname):
        self.FG_fname = FG_fname
        self.CG_fname = CG_fname

        self.FG_trj = md.load_pdb(filename=self.FG_fname).center_coordinates()
        self.CG_trj = md.load_pdb(filename=self.CG_fname).center_coordinates()

        self._parse()
        self._create_mapping()
        self._align()
def loader(file):

    try:
        md.load_pdb(file)
    except:
        return None
            
    print(file)
    
    return None
Beispiel #9
0
def loader(file):

    try:
        md.load_pdb(file)
    except:
        return None

    print(file)

    return None
Beispiel #10
0
    def __init__(self, CG_pdb_f_name, AA_pdb_f_name):
        self.CG_pdb_f_name = CG_pdb_f_name
        self.AA_pdb_f_name = AA_pdb_f_name

        self.CG_trj = md.load_pdb(filename=self.CG_pdb_f_name).remove_solvent()
        self.CG_top = self.CG_trj.top
        self.AA_trj = md.load_pdb(filename=self.AA_pdb_f_name).remove_solvent()
        self.AA_top = self.AA_trj.top

        self.CG_beads = parse_CG_pdb(self.CG_pdb_f_name)
        self.AA_beads = parse_AA_pdb(self.AA_pdb_f_name)
Beispiel #11
0
    def _construct_traj(self):
        logger.debug('Loading Trajectory object for model {0} ({1}/{2})'.format(self.df.templateid.iloc[0], 0, len(self.df.model_filepath)))
        traj = mdtraj.load_pdb(self.df.model_filepath[0])
        remove_disulfide_bonds_from_topology(traj.topology)
        self.traj = traj

        for m, model_filepath in enumerate(self.df.model_filepath[1:]):
            logger.debug('Loading Trajectory object for model {0} ({1}/{2})'.format(self.df.templateid.iloc[m+1], m+1, len(self.df.model_filepath)))
            traj = mdtraj.load_pdb(model_filepath)
            remove_disulfide_bonds_from_topology(traj.topology)
            self.traj += traj
Beispiel #12
0
	def makeDatabase(self, q_values,
		num_iter, lmax,
		save_path,
		beta = 0.0, 
		alpha = 1.0, 
		zeta = 0.0):

		self.alpha = alpha
		self.beta = beta
		self.zeta = zeta

		self.q_values = q_values
		self.lmax = lmax
		self.num_iter = num_iter

		traj_full = mdtraj.load_pdb( self.traj_full_path )
		traj_guess = mdtraj.load_pdb( self.traj_guess_path )

		if type(q_values) == str and q_values == 'all':
			try:
				assert( np.isclose( self.model_guess.q_values, 
					self.model_full.q_values ).all() )
			except AssertionError:
				print( "q_values of the model_full and model_guess do not match. \
					Cannot use all q_values." )
				return
			self.q_values = self.model_full.q_values
			# do nothing and do not slice the models
		else: 
			self.model_full.slice_by_qvalues( q_values=self.q_values, inplace=True )
			self.model_guess.slice_by_qvalues( q_values=self.q_values, inplace=True)
			try:
				assert( np.isclose( self.model_guess.q_values, 
					self.model_full.q_values ).all() )
			except AssertionError:
				print( "q_values of the model_full and model_guess do not match. \
					Failed!!!" )
				return

		# retriever the phases
		self._retrieve_phases()

		# compare guess to full model
		self._compare_full_guess()

		# compute RMSD
		try:
			self.rmsd = mdtraj.rmsd( traj_full, traj_guess, frame = 0)
		except ValueError:
			self.rmsd = 0

		# save
		self._saveDB( save_path )
Beispiel #13
0
def test_segment_id(get_fn):
    pdb = load_pdb(get_fn('ala_ala_ala.pdb'))
    pdb.save_pdb(temp)
    pdb2 = load_pdb(temp)

    correct_segment_id = 'AAL'
    # check that all segment ids are set correctly
    for ridx,r in enumerate(pdb.top.residues):
        assert r.segment_id == correct_segment_id, "residue %i (0-indexed) does not have segment_id set correctly from ala_ala_ala.pdb"%(ridx)

    # check that all segment ids are set correctly after a new pdb file is written
    for ridx,(r1,r2) in enumerate(zip(pdb.top.residues,pdb2.top.residues)):
        assert r1.segment_id == r2.segment_id, "segment_id of residue %i (0-indexed) in ala_ala_ala.pdb does not agree with value in after being written out to a new pdb file"%(ridx)
Beispiel #14
0
def test_1vii_url_and_gz():
    t1 = load_pdb('http://www.rcsb.org/pdb/files/1vii.pdb.gz')
    t2 = load_pdb('http://www.rcsb.org/pdb/files/1vii.pdb')
    t3 = load_pdb(get_fn('1vii.pdb.gz'))
    t4 = load_pdb(get_fn('1vii.pdb'))
    eq(t1.n_frames, 1)
    eq(t1.n_frames, t2.n_frames)
    eq(t1.n_frames, t3.n_frames)
    eq(t1.n_frames, t4.n_frames)

    eq(t1.n_atoms, t2.n_atoms)
    eq(t1.n_atoms, t3.n_atoms)
    eq(t1.n_atoms, t4.n_atoms)
Beispiel #15
0
def test_segment_id(get_fn):
    pdb = load_pdb(get_fn('ala_ala_ala.pdb'))
    pdb.save_pdb(temp)
    pdb2 = load_pdb(temp)

    correct_segment_id = 'AAL'
    # check that all segment ids are set correctly
    for ridx,r in enumerate(pdb.top.residues):
        assert r.segment_id == correct_segment_id, "residue %i (0-indexed) does not have segment_id set correctly from ala_ala_ala.pdb"%(ridx)

    # check that all segment ids are set correctly after a new pdb file is written
    for ridx,(r1,r2) in enumerate(zip(pdb.top.residues,pdb2.top.residues)):
        assert r1.segment_id == r2.segment_id, "segment_id of residue %i (0-indexed) in ala_ala_ala.pdb does not agree with value in after being written out to a new pdb file"%(ridx)
Beispiel #16
0
def test_1vii_url_and_gz():
    t1 = load_pdb('http://www.rcsb.org/pdb/files/1vii.pdb.gz')
    t2 = load_pdb('http://www.rcsb.org/pdb/files/1vii.pdb')
    t3 = load_pdb(get_fn('1vii.pdb.gz'))
    t4 = load_pdb(get_fn('1vii.pdb'))
    eq(t1.n_frames, 1)
    eq(t1.n_frames, t2.n_frames)
    eq(t1.n_frames, t3.n_frames)
    eq(t1.n_frames, t4.n_frames)
    
    eq(t1.n_atoms, t2.n_atoms)
    eq(t1.n_atoms, t3.n_atoms)
    eq(t1.n_atoms, t4.n_atoms)
    def _get_models(self):
        self.model = {}
        root, dirnames, filenames = next(os.walk(self.models_target_dir))
        for dirname in dirnames:
            if 'implicit' in self.model and 'explicit' in self.model:
                break
            if 'implicit' not in self.model:
                implicit_model_filename = os.path.join(self.models_target_dir, dirname, 'implicit-refined.pdb.gz')
                if os.path.exists(implicit_model_filename):
                    self.model['implicit'] = mdtraj.load_pdb(implicit_model_filename)

            if 'explicit' not in self.model:
                explicit_model_filename = os.path.join(self.models_target_dir, dirname, 'explicit-refined.pdb.gz')
                if os.path.exists(explicit_model_filename):
                    self.model['explicit'] = mdtraj.load_pdb(explicit_model_filename)
Beispiel #18
0
def func(arg):
    xtcf, topf, reffs, p, sel = arg
    toppdb = md.load_pdb(topf)
    atm0 = toppdb.top.select(sel)
    xtc = md.load_xtc(xtcf, topf)
    xtc = xtc.atom_slice(atm0)

    rmsd = []
    for reff in reffs:
        refpdb = md.load_pdb(reff)
        atm1 = refpdb.top.select(sel)
        ref = refpdb.atom_slice(atm1)
        rmsd.append(md.rmsd(xtc, ref))
    rmsd.append([p] * len(xtc))
    return np.array(rmsd).T
Beispiel #19
0
    def get(self, pdbid):
        pdbid = pdbid.lower()

        if len(pdbid) != 4:
            raise Exception("pdb id must be four characters long")

        filename = os.path.join(self.temp_dir, '%s.pdb.gz' % pdbid)
        with open(filename, 'wb') as filehandler:
            self.conn.retrbinary(
                'RETR pub/pdb/data/structures/divided/pdb/%s/pdb%s.ent.gz' %
                (pdbid[1:3], pdbid), filehandler.write)

        if self.just_files:
            return filename

        else:
            # then we actually want to load it as a pdb file object and return an mdtraj trajectory
            with gzip.open(filename) as filehandler:
                text = filehandler.read()

            with open(filename[:-3], 'w') as filehandler:
                filehandler.write(text)

            pdb = mdtraj.load_pdb(filename[:-3])

            return pdb
Beispiel #20
0
def clean_pdb(name, route=None, chain_num=None):
    """ Cleans the structure to only leave the important part.
        Inputs: 
        * name: str. route of the input .pdb file
        * route: str. route of the output. will overwrite input if not provided
        * chain_num: int. index of chain to select (1-indexed as pdb files)
        Output: route of destin file.
    """
    destin = route if route is not None else name
    # read input
    raw_prot = mdtraj.load_pdb(name)
    # iterate over prot and select the specified chains
    idxs = []
    for chain in raw_prot.topology.chains:
        # if arg passed, only select that chain
        if chain_num is not None:
            if chain_num != chain.index:
                continue
        # select indexes of chain
        chain_idxs = raw_prot.topology.select(f"chainid == {str(chain.index)}")
        idxs.extend( chain_idxs.tolist() )
    # sort: topology and xyz selection are ordered
    idxs = sorted(idxs)
    # get new trajectory from the sleected subset of indexes and save
    prot = mdtraj.Trajectory(xyz=raw_prot.xyz[:, idxs], 
                             topology=raw_prot.topology.subset(idxs))
    prot.save(destin)
    return destin
Beispiel #21
0
def _load_traj(ag):
    h, tmp = tempfile.mkstemp(dir='.', suffix='.pdb')
    os.close(h)
    prody.writePDB(tmp, ag)
    traj = md.load_pdb(tmp, frame=0)
    os.remove(tmp)
    return traj
Beispiel #22
0
def test_4():
    t = md.load_pdb(get_fn('1am7_protein.pdb'))
    a = md.compute_dssp(t, simplified=True)
    b = md.compute_dssp(t, simplified=False)
    assert len(a) == len(b)
    assert len(a[0]) == len(b[0])
    assert list(np.unique(a[0])) == ['C', 'E', 'H']
Beispiel #23
0
def custom2pdb(coords, proteinnet_id, route):
    """ Takes a custom representation and turns into a .pdb file. 
        Inputs:
        * coords: array/tensor of shape (3 x N) or (N x 3). in Angstroms.
                  same order as in the proteinnnet is assumed (same as raw pdb file)
        * proteinnet_id: str. proteinnet id format (<class>#<pdb_id>_<chain_number>_<chain_id>)
                         see: https://github.com/aqlaboratory/proteinnet/
        * route: str. destin route.
        Output: tuple of routes: (original, generated) for the structures. 
    """
    # convert to numpy
    if isinstance(coords, torch.Tensor):
        coords = coords.detach().cpu().numpy()
    # ensure (1, N, 3)
    if coords.shape[1] == 3:
        coords = coords.T
    coords = np.newaxis(coords, axis=0)
    # get pdb id and chain num
    pdb_name, chain_num = proteinnet_id.split("#")[-1].split("_")[:-1]
    pdb_destin = "/".join(route.split("/")[:-1])+"/"+pdb_name+".pdb"
    # download pdb file and select appropiate 
    download_pdb(pdb_name, pdb_destin)
    clean_pdb(pdb_destin, chain_num=chain_num)
    # load trajectory scaffold and replace coordinates - assumes same order
    scaffold = mdtraj.load_pdb(pdb_destin)
    scaffold.xyz = coords
    scaffold.save(route)
    return pdb_destin, route
Beispiel #24
0
def main(args):
    traj = md.load_pdb('http://www.rcsb.org/pdb/files/2EQQ.pdb')

    #strip sidechains of residues 2 and 27
    idxs = traj.topology.select(
        'name CB CA N C or not (resSeq 2 or resSeq 27)')
    traj = traj.atom_slice(idxs)

    bins = np.linspace(0.0, 80.0, 41)
    bin_centers = bins[:-1] + (bins[1] - bins[0]) / 2.0

    #get the index of donor and acceptor attachment atoms
    donorAttIdx = traj.topology.select('name CB and resSeq 2')[0]
    acceptorAttIdx = traj.topology.select('name CB and resSeq 27')[0]

    #print header
    print('Frame#\t' + 'A\t'.join('{:.1f}'.format(e) for e in bin_centers))

    for frIdx in range(traj.n_frames):
        gridDonor = genAV(traj, frIdx, donorAttIdx, 20.0, 2.0, 3.5, 0.9, 0.3,
                          3.5 + 3.0)
        gridAcceptor = genAV(traj, frIdx, acceptorAttIdx, 22.0, 2.0, 3.5, 0.9,
                             0.4, 3.5 + 3.0)

        donorVolSize = np.count_nonzero(np.array(gridDonor.grid) > 1.0)
        accVolSize = np.count_nonzero(np.array(gridAcceptor.grid) > 1.0)
        if donorVolSize == 0 or accVolSize == 0:
            continue

        distances = ll.sampleDistanceDistInv(gridDonor, gridAcceptor, 1000000)
        freq, _ = np.histogram(distances, bins=bins)
        print('{}\t'.format(frIdx) + '\t'.join(str(e) for e in freq))
Beispiel #25
0
    def _construct_traj(self):
        logger.debug(
            'Loading Trajectory object for model {0} ({1}/{2})'.format(
                self.df.templateid.iloc[0], 0, len(self.df.model_filepath)))
        traj = mdtraj.load_pdb(self.df.model_filepath[0])
        remove_disulfide_bonds_from_topology(traj.topology)
        self.traj = traj

        for m, model_filepath in enumerate(self.df.model_filepath[1:]):
            logger.debug(
                'Loading Trajectory object for model {0} ({1}/{2})'.format(
                    self.df.templateid.iloc[m + 1], m + 1,
                    len(self.df.model_filepath)))
            traj = mdtraj.load_pdb(model_filepath)
            remove_disulfide_bonds_from_topology(traj.topology)
            self.traj += traj
Beispiel #26
0
def load_pdb(fn):
    t = mt.load_pdb(fn)
    top = t.topology
    protein_indices = top.select("protein")
    t = t.atom_slice(protein_indices)

    return t[int(t.n_frames * 0.8): ]
Beispiel #27
0
def metal_searcher(file):
    
    metal_searcher_results = {}
    
    metal_searcher_results['ok_file_count'] = 0
    metal_searcher_results['error_file_count'] = 0
    metal_searcher_results['metals_in_file'] = {}
    metal_searcher_results['any_metal_in_file'] = 0
    
    for i in metals_list:
        metal_searcher_results['metals_in_file'][i] = 0
    
    print(file)
    
    try:
        traj = md.load_pdb(file)
        metal_searcher_results['ok_file_count'] = 1
    except:
        metal_searcher_results['error_file_count'] = 1
        return metal_searcher_results
        
    topo = traj.topology
    
    one_atom_residue_atoms = [atom.name for atom in topo.atoms if atom.name == atom.residue.name and atom.name in metals_list]
    
    if one_atom_residue_atoms:
        metal_searcher_results['any_metal_in_file'] = 1
        for atom_name in one_atom_residue_atoms:
            metal_searcher_results['metals_in_file'][atom_name] += 1
            
    print(metal_searcher_results)        
    return metal_searcher_results
Beispiel #28
0
    def get(self, pdbid):
        pdbid = pdbid.lower()

        if len(pdbid) != 4:
            raise Exception("pdb id must be four characters long")

        filename = os.path.join(self.temp_dir, '%s.pdb.gz' % pdbid)
        with open(filename, 'wb') as filehandler:
            self.conn.retrbinary('RETR pub/pdb/data/structures/divided/pdb/%s/pdb%s.ent.gz' % (pdbid[1:3], pdbid),
                filehandler.write)

        if self.just_files:
            return filename

        else:
            # then we actually want to load it as a pdb file object and return an mdtraj trajectory
            with gzip.open(filename) as filehandler:
                text = filehandler.read()

            with open(filename[:-3], 'w') as filehandler:
                filehandler.write(text)

            pdb = mdtraj.load_pdb(filename[:-3])

            return pdb
Beispiel #29
0
def test_3(tmpdir):
    # 1COY gives a small error, due to a broken chain.
    pdbids = ['1GAI', '6gsv', '2AAC']
    for pdbid in pdbids:
        t = md.load_pdb('http://www.rcsb.org/pdb/files/%s.pdb' % pdbid)
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])
    def _execute(self, directory, available_resources):

        import mdtraj
        from paprika.evaluator import Setup
        from simtk.openmm import app

        mdtraj_trajectory = mdtraj.load_pdb(self.complex_file_path)

        atom_indices_by_role = _atom_indices_by_role(
            self.substance, self.complex_file_path
        )
        host_atom_indices = atom_indices_by_role[Component.Role.Receptor]

        host_trajectory = mdtraj_trajectory.atom_slice(host_atom_indices)

        host_file_path = os.path.join(directory, "host_input.pdb")
        host_trajectory.save(host_file_path)

        host_structure = Setup.prepare_host_structure(host_file_path)

        self.output_coordinate_path = os.path.join(directory, "output.pdb")

        with open(self.output_coordinate_path, "w") as file:
            app.PDBFile.writeFile(
                host_structure.topology, host_structure.positions, file, True
            )
Beispiel #31
0
def test_1():
    for fn in ['1bpi.pdb', '1vii.pdb', '4K6Q.pdb', '1am7_protein.pdb']:
        t = md.load_pdb(get_fn(fn))
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        f = lambda : assert_(call_dssp(t), md.compute_dssp(t, simplified=False)[0])
        f.description = 'test_1: %s' % fn
        yield f
Beispiel #32
0
def res_seq(pdb):

    p = mt.load_pdb(pdb)
    seq = list(p.topology.residues)
    seq = [str(x)[:3] for x in seq if len(str(x)) >= 3]

    return list(map(long2short, seq))
Beispiel #33
0
def test_4():
    t = md.load_pdb(get_fn('1am7_protein.pdb'))
    a = md.compute_dssp(t, simplified=True)
    b = md.compute_dssp(t, simplified=False)
    assert len(a) == len(b)
    assert len(a[0]) == len(b[0])
    assert list(np.unique(a[0])) == ['C', 'E', 'H']
Beispiel #34
0
def main():
    inp, topf, sel, outn, dt = parse_args()
    if os.path.isdir(inp):
        xtcfs = sorted([os.path.join(inp, _) for _ in os.listdir(inp)])
        if topf is None:
            print("Eorror: top file is required!")
            exit(0)
    else:
        meta = pd.read_pickle(inp)
        topf = meta["top_fn"].values[0]
        xtcfs = meta["traj_fn"].values
    top = md.load_pdb(topf)
    ndx = top.top.select(sel)
    top = top.atom_slice(ndx)

    xtcall = None
    for xtcf in xtcfs:
        xtc = md.load_xtc(xtcf, top=topf, atom_indices=ndx, stride=dt)
        if xtcall is None:
            xtcall = xtc
        else:
            xtc.topology = xtcall.topology
            xtcall = xtcall.join(xtc)

    outtop = "%s.pdb" % outn
    outdcd = "%s.dcd" % outn
    top.save_pdb(outtop)
    xtcall.superpose(top)
    xtcall.save_dcd(outdcd)
Beispiel #35
0
def make_perturbed_traj(traj_path, save_path, f_perturb):
	traj = mdtraj.load_pdb(traj_path)
	mean_dist = np.mean( np.sum( (traj.xyz[0] - traj.xyz[0].mean(0))**2, axis = 1), axis = 0)
	x = np.random.rand(traj.xyz.shape[1], traj.xyz.shape[2])*2 - 1
	shifts = x * f_perturb * mean_dist
	traj.xyz += shifts
	traj.save(save_path)
Beispiel #36
0
def test_refine_explicit_md_short():
    with integrationtest_context(set_up_project_stage='solvated'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        refine_explicit_md(process_only_these_targets=[targetid],
                           process_only_these_templates=[templateid],
                           sim_length=2.0 * unit.femtosecond,
                           nsteps_per_iteration=1,
                           verbose=True)
        explicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid,
            'refine_explicit_md-meta0.yaml')
        explicit_model_filepath = os.path.join(default_project_dirnames.models,
                                               targetid, templateid,
                                               'explicit-refined.pdb.gz')
        explicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid,
            'explicit-energies.txt')
        explicit_log_filepath = os.path.join(default_project_dirnames.models,
                                             targetid, templateid,
                                             'explicit-log.yaml')

        assert all(
            map(os.path.exists, [
                explicit_model_filepath, explicit_energies_filepath,
                explicit_log_filepath
            ]))
        with open(explicit_log_filepath) as explicit_log_file:
            explicit_log = yaml.load(explicit_log_file)
        assert explicit_log.get('finished') is True
        assert explicit_log.get('successful') is True
        explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
def metal_searcher(file):
    
    metal_searcher_results = {}
    metal_searcher_results['one_atom_residue_names'] = []
    metal_searcher_results['ok_file_count'] = 0
    metal_searcher_results['error_file_count'] = 0
    
    print(file)
    
    try:
        traj = md.load_pdb(file)
        metal_searcher_results['ok_file_count'] = 1
    except:
        metal_searcher_results['error_file_count'] = 1
        return metal_searcher_results
        
    topo = traj.topology
    
    one_atom_residue_atoms = [atom for atom in topo.atoms if atom.name == atom.residue.name]
    
    if one_atom_residue_atoms:
        for atom in one_atom_residue_atoms:
            metal_searcher_results['one_atom_residue_names'].append(str(atom.name))
            
    print(metal_searcher_results)        
    return metal_searcher_results
Beispiel #38
0
def fasta_dict(fasta_dir, prot_ids):

    fasta_seq_dict = {}
    for p in prot_ids:
        if os.path.exists(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb"))) and\
            not os.path.exists(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb"))):
            try:
                traj = mt.load_pdb(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb")))
                fasta = "".join(traj.topology.to_fasta())
                print("Converting PDB to fasta")
                with open(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb")), 'w') as tof:
                    tof.write(">%s\n" % p)
                    tof.write("%s\n" % fasta)
                tof.close()
            except:
                print("Doesn't find pdb or fasta file......")

        fn = os.path.join(os.path.join(fasta_dir, p+".fasta"))
        if os.path.exists(fn):
            fasta_seq_dict[p] = get_fasta_seq(fn)
        else:
            print("fasta file not exists: ", fn)
            fasta_seq_dict[p] = "X"

    return fasta_seq_dict
def loader(file):

    print(file)
    try:
        traj = md.load_pdb(file)
    except:    
        return file
Beispiel #40
0
def test_1():
    for fn in ['1bpi.pdb', '1vii.pdb', '4K6Q.pdb', '1am7_protein.pdb']:
        t = md.load_pdb(get_fn(fn))
        t = t.atom_slice(t.top.select_atom_indices('minimal'))
        f = lambda : assert_(call_dssp(t), md.compute_dssp(t, simplified=False)[0])
        f.description = 'test_1: %s' % fn
        yield f
Beispiel #41
0
def metal_searcher(file):

    metal_searcher_results = {}
    metal_searcher_results['one_atom_residue_names'] = []
    metal_searcher_results['ok_file_count'] = 0
    metal_searcher_results['error_file_count'] = 0

    print(file)

    try:
        traj = md.load_pdb(file)
        metal_searcher_results['ok_file_count'] = 1
    except:
        metal_searcher_results['error_file_count'] = 1
        return metal_searcher_results

    topo = traj.topology

    one_atom_residue_atoms = [
        atom for atom in topo.atoms if atom.name == atom.residue.name
    ]

    if one_atom_residue_atoms:
        for atom in one_atom_residue_atoms:
            metal_searcher_results['one_atom_residue_names'].append(
                str(atom.name))

    print(metal_searcher_results)
    return metal_searcher_results
Beispiel #42
0
def test_solvate_existing_structure_protocol():
    """Tests solvating a single methanol molecule in water."""

    import mdtraj

    methanol_component = Component("CO")

    methanol_substance = Substance()
    methanol_substance.add_component(methanol_component, ExactAmount(1))

    water_substance = Substance()
    water_substance.add_component(Component("O"), MoleFraction(1.0))

    with tempfile.TemporaryDirectory() as temporary_directory:

        build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol")
        build_methanol_coordinates.max_molecules = 1
        build_methanol_coordinates.substance = methanol_substance
        build_methanol_coordinates.execute(temporary_directory,
                                           ComputeResources())

        methanol_residue_name = build_methanol_coordinates.assigned_residue_names[
            methanol_component.identifier]

        solvate_coordinates = SolvateExistingStructure("solvate_methanol")
        solvate_coordinates.max_molecules = 9
        solvate_coordinates.substance = water_substance
        solvate_coordinates.solute_coordinate_file = (
            build_methanol_coordinates.coordinate_file_path)
        solvate_coordinates.execute(temporary_directory, ComputeResources())
        solvated_system = mdtraj.load_pdb(
            solvate_coordinates.coordinate_file_path)

        assert solvated_system.n_residues == 10
        assert solvated_system.top.residue(0).name == methanol_residue_name
Beispiel #43
0
def test_refine_explicit_md_short():
    with integrationtest_context(set_up_project_stage='solvated'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        refine_explicit_md(
            process_only_these_targets=[targetid],
            process_only_these_templates=[templateid],
            sim_length=2.0*unit.femtosecond,
            nsteps_per_iteration=1,
            verbose=True
        )
        explicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid, 'refine_explicit_md-meta0.yaml'
        )
        explicit_model_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'explicit-refined.pdb.gz'
        )
        explicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'explicit-energies.txt'
        )
        explicit_log_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'explicit-log.yaml'
        )

        assert all(map(
            os.path.exists,
            [explicit_model_filepath, explicit_energies_filepath, explicit_log_filepath]
        ))
        with open(explicit_log_filepath) as explicit_log_file:
            explicit_log = yaml.load(explicit_log_file)
        assert explicit_log.get('finished') is True
        assert explicit_log.get('successful') is True
        explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
Beispiel #44
0
    def __init__(self, PDB_filename, free_energy, expdata_filename=None, use_log_normal_distances=False,
                       dloggamma=np.log(1.01), gamma_min=0.2, gamma_max=10.0):
        """Initialize the class.
        INPUTS
	conf		A molecular structure as an msmbuilder Conformation() object.
                        NOTE: For cases where the structure is an ensemble (say, from clustering)
                        and the modeled NOE distances and coupling constants are averaged, 
                        the structure itself can just be a placeholder with the right atom names
                        and numbering
              
        free_energy     The (reduced) free energy f = beta*F of this conformation
        """

        self.PDB_filename = PDB_filename
        self.expdata_filename = expdata_filename
        self.conf = mdtraj.load_pdb(PDB_filename)
        # Convert the coordinates from nm to Angstrom units 
        self.conf.xyz = self.conf.xyz*10.0 

        # The (reduced) free energy f = beta*F of this structure, as predicted by modeling
        self.free_energy = free_energy

        # Flag to use log-normal distance errors log(d/d0)
        self.use_log_normal_distances = use_log_normal_distances

        # Store info about gamma^(-1/6) scaling  parameter array
        self.dloggamma = dloggamma
        self.gamma_min = gamma_min
        self.gamma_max = gamma_max
        self.allowed_gamma = np.exp(np.arange(np.log(self.gamma_min), np.log(self.gamma_max), self.dloggamma))

        # Store distance restraint info
        self.distance_restraints = []
        self.distance_equivalency_groups = {}
        self.ambiguous_groups = []  # list of pairs of group indices, e.g.:   [ [[1,2,3],[4,5,6]],   [[7],[8]], ...]
        self.ndistances = 0

        # Store dihedral restraint info
        self.dihedral_restraints = [] 
        self.dihedral_equivalency_groups = {}
        self.dihedral_ambiguity_groups = {}
        self.ndihedrals = 0

        # Create a KarplusRelation object
        self.karplus = KarplusRelation()

        # variables to store pre-computed SSE and effective degrees of freedom (d.o.f.)
        self.sse_distances = np.array([0.0 for gamma in self.allowed_gamma])
        self.Ndof_distances = None
        self.sse_dihedrals = None
        self.Ndof_dihedrals = None
        self.betas = None   # if reference is used, an array of N_j betas for each distance
        self.neglog_reference_priors = None
        self.sum_neglog_reference_priors = 0.0

        # If an experimental data file is given, load in the information
        self.expdata_filename = expdata_filename
        if expdata_filename != None:
        	self.load_expdata(expdata_filename)
Beispiel #45
0
def test_topology(get_fn):
    top = md.load_pdb(get_fn('native.pdb')).topology

    with HDF5TrajectoryFile(temp, 'w') as f:
        f.topology = top

    with HDF5TrajectoryFile(temp) as f:
        assert f.topology == top
Beispiel #46
0
def test_load_trajectory(get_fn):
    # Compare a TNG file to the PDB file it was created from.

    pdbtraj = md.load_pdb(get_fn('frame0.pdb'))
    tngtraj = md.load_tng(get_fn('frame0.tng'), top=pdbtraj.topology)
    eq(pdbtraj.n_frames, tngtraj.n_frames)
    eq(pdbtraj.unitcell_vectors, tngtraj.unitcell_vectors)
    eq(pdbtraj.xyz, tngtraj.xyz)
Beispiel #47
0
def test_3nch_conect():
    # This has conect entries that use all available digits, good failure case.
    t1 = load_pdb(get_fn('3nch.pdb.gz'))
    top, bonds = t1.top.to_dataframe()
    bonds = dict(((a, b), 1) for (a, b) in bonds)
    eq(bonds[19782, 19783], 1)  # Check that last SO4 molecule has right bonds
    eq(bonds[19782, 19784], 1)  # Check that last SO4 molecule has right bonds
    eq(bonds[19782, 19785], 1)  # Check that last SO4 molecule has right bonds
    eq(bonds[19782, 19786], 1)  # Check that last SO4 molecule has right bonds
Beispiel #48
0
 def _store_highest_seqid_model(self):
     models_sorted = self.df.sort('seqid').templateid
     for modelid in models_sorted:
         model_filepath = os.path.join(self.models_target_dir, modelid, self.model_filename)
         if os.path.exists(model_filepath):
             self.ref_modelid = modelid
             self.ref_model_filepath = model_filepath
             break
     self.ref_model_traj = mdtraj.load_pdb(self.ref_model_filepath)
Beispiel #49
0
 def _align_structures(self):
     for irow in range(self.nrows):
         row_first_col = irow * self.ncols
         self._ref_traj = mdtraj.load_pdb(self.structure_filepaths[row_first_col])
         self._heavy_atoms = self._ref_traj.topology.select('not name H')
         for icol in range(self.ncols):
             alignment_index = row_first_col + icol
             structure_filepath = self.structure_filepaths[alignment_index]
             self._align_structure(structure_filepath, alignment_index)
Beispiel #50
0
    def __init__(self, PDB_filename, lam, free_energy, data=None,
             use_log_normal_noe=False, dloggamma=np.log(1.01), gamma_min=0.2,
             gamma_max=10.0):
         """Initialize the Restraint class.
         INPUTS
         PDB_filename        A topology file (*.pdb)
         lam        lambda value (between 0 and 1)
         free_energy     The (reduced) free energy f = beta*F of this conformation
         data            input data for BICePs (both model and exp)
         use_log_normal_distances    Not sure what's this...
         dloggamma    gamma is in log space
         gamma_min    min value of gamma
         gamma_max    max value of gamma
         """

         # Store restraint info
         self.restraints = []   # a list of data container objects for each restraint (e.g. NMR_Chemicalshift_Ca())
         self.n = 0
         self.sse = 0
         self.Ndof = None

         # used for exponential reference potential
         self.betas = None
         self.neglog_exp_ref = None
         self.sum_neglog_exp_ref = 0.0

         # used for Gaussian reference potential
         self.ref_sigma = None
         self.ref_mean = None
         self.neglog_gau_ref = None
         self.sum_neglog_gau_ref = 0.0

         #NOTE: This is where we appended the code from previous
         self.PDB_filename = PDB_filename
         self.data = data
         self.conf = mdtraj.load_pdb(PDB_filename)
         # Convert the coordinates from nm to Angstrom units
         self.conf.xyz = self.conf.xyz*10.0

         # The (reduced) free energy f = beta*F of this structure, as predicted by modeling
         self.free_energy = lam*free_energy

         # Store info about gamma^(-1/6) scaling  parameter array
         self.dloggamma = dloggamma
         self.gamma_min = gamma_min
         self.gamma_max = gamma_max
         self.allowed_gamma = np.exp(np.arange(np.log(self.gamma_min), np.log(self.gamma_max), self.dloggamma))

         # Flag to use log-normal distance errors log(d/d0)
         self.use_log_normal_noe = use_log_normal_noe

         # Create a KarplusRelation object
         self.karplus = KarplusRelation()
Beispiel #51
0
def get_renumbered_topol_resnums(target):
    models_target_dir = os.path.join(default_project_dirnames.models, target.id)
    renumbered_resnums = {}
    for topol_type in ['implicit', 'explicit']:
        topol_path = os.path.join(models_target_dir, 'topol-renumbered-{}.pdb'.format(topol_type))
        if not os.path.exists(topol_path):
            continue
        traj = mdtraj.load_pdb(topol_path)
        res_numbers = [resi.resSeq for resi in traj.top.residues]
        renumbered_resnums[topol_type] = res_numbers
        logger.info('Will use renumbered residues from {} for target {}'.format(topol_path, target.id))
    return renumbered_resnums
Beispiel #52
0
def add_bonds_to_pdb(infile, topfile):
    t = md.load_pdb(infile)
    if t.n_residues == 1:
        return
    first = t.top.residue(0)
    t.restrict_atoms(range(first.n_atoms))
    gtop = app.GromacsTopFile(topfile)._currentMoleculeType
    top, bonds = t.top.to_dataframe()
    bonds = np.array([(row[0],row[1]) for row in gtop.bonds],'int')
    bonds = bonds -1
    t.top = md.Topology.from_dataframe(top, bonds)
    t.save(infile)
Beispiel #53
0
def haszn(i):
    try:
        traj = md.load_pdb(i)
    except:
        return None

    topo = traj.topology

    atoms = [str(x) for x in topo.atoms]
    atomszn = [x for x in atoms if "ZN" in x]

    if atomszn:
        znpdbs.append(i)
def metal_scanner(file):
    
    contains_metal = False
    try:
        traj = md.load_pdb(file)
    except:
        return None    
    
    if [atom for atom in traj.top.atoms if atom.name == 'ZN']:
      contains_metal = True
    
    print(contains_metal)        
    return contains_metal
def metal_scanner(file):
    
    contains_metal = [False, file]
    try:
        traj = md.load_pdb(file)
    except:
        return [None, file]    
    
    if [atom for atom in traj.top.atoms if atom.name == 'ZN' and atom.residue.name == 'ZN']:
      contains_metal[0] = True
    
    print(contains_metal)        
    return contains_metal
def file_reader(file):
    
    global ok_file_count
    global error_file_count
    
    try:
        traj = md.load_pdb(file)
        with lock:
            ok_file_count.value += 1
    except:
        with lock:
            error_file_count.value += 1
    
    print(file)
    print(ok_file_count.value)        
Beispiel #57
0
    def __init__(self, PDB_filename, ref, dlogsigma=np.log(1.02),
            sigma_min=0.05, sigma_max=20.0, use_global_ref_sigma=True):
        """Initialize the Restraint class.

        :param str PDB_filename: A topology file (*.pdb)
        :param str ref: Reference potential.
        :param float default=np.log(1.02) dlogsigma:
        :param float sigma_min: default = 0.05
        :param float sigma_max: default = 20.0
        :param bool default=True use_global_ref_sigma: """


        # Store restraint info
        self.restraints = []   # a list of data container objects for each restraint (e.g. NMR_Chemicalshift_Ca())

        # Conformational Information
        self.PDB_filename = PDB_filename
        self.conf = mdtraj.load_pdb(PDB_filename)

        # Convert the coordinates from nm to Angstrom units
        self.conf.xyz = self.conf.xyz*10.0

        # used for exponential reference potential
        self.betas = None
        self.neglog_exp_ref = None
        self.sum_neglog_exp_ref = 0.0

        # used for Gaussian reference potential
        self.ref_sigma = None
        self.ref_mean = None
        self.neglog_gaussian_ref = None
        self.sum_neglog_gaussian_ref = 0.0
        self.use_global_ref_sigma = use_global_ref_sigma
        self.see = None

        # Storing the reference potential
        self.ref = ref

        # set sigma range
        self.dlogsigma = dlogsigma
        self.sigma_min = sigma_min
        self.sigma_max = sigma_max
        self.allowed_sigma = np.exp(np.arange(np.log(self.sigma_min),
            np.log(self.sigma_max), self.dlogsigma))
        self.sigma_index = len(self.allowed_sigma)/2
        self.sigma = self.allowed_sigma[self.sigma_index]
Beispiel #58
0
    def __init__(self, PDB_filename, ref, dlogsigma=np.log(1.02), sigma_min=0.05, sigma_max=20.0, use_global_ref_sigma=True):
        """Initialize the Restraint class.

        INPUTS
        ------

        PDB_filename        A topology file (*.pdb)
        data            input data for BICePs (both model and exp)
        ref           Reference potential.
        """

        # Store restraint info
        self.restraints = []   # a list of data container objects for each restraint (e.g. NMR_Chemicalshift_Ca())

        # Conformational Information
        self.PDB_filename = PDB_filename
        self.conf = mdtraj.load_pdb(PDB_filename)

        # Convert the coordinates from nm to Angstrom units
        self.conf.xyz = self.conf.xyz*10.0

        # used for exponential reference potential
        self.betas = None
        self.neglog_exp_ref = None
        self.sum_neglog_exp_ref = 0.0

        # used for Gaussian reference potential
        self.ref_sigma = None
        self.ref_mean = None
        self.neglog_gaussian_ref = None
        self.sum_neglog_gaussian_ref = 0.0
        self.use_global_ref_sigma = use_global_ref_sigma
        self.see = None

        # Storing the reference potential
        self.ref = ref

        # set sigma range
        self.dlogsigma = dlogsigma
        self.sigma_min = sigma_min
        self.sigma_max = sigma_max
#        self.allowed_sigma = np.exp(np.arange(np.log(self.sigma_min),
#            np.log(self.sigma_max), self.dlogsigma))
        self.allowed_sigma = np.arange(self.sigma_min,self.sigma_max,self.dlogsigma)
        self.sigma_index = len(self.allowed_sigma)/2
        self.sigma = self.allowed_sigma[self.sigma_index]
Beispiel #59
0
def test_refine_implicit_md_short():
    with integrationtest_context(set_up_project_stage='clustered'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        refine_implicit_md(
            process_only_these_targets=[targetid],
            process_only_these_templates=[templateid],
            sim_length=2.0*unit.femtosecond,
            nsteps_per_iteration=1,
            minimization_steps=1,
            loglevel='debug'
        )
        implicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid, 'refine_implicit_md-meta0.yaml'
        )
        implicit_model_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'implicit-refined.pdb.gz'
        )
        implicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'implicit-energies.txt'
        )
        implicit_log_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'implicit-log.yaml'
        )

        assert all(map(
            os.path.exists,
            [implicit_model_filepath, implicit_energies_filepath, implicit_log_filepath]
        ))
        with open(implicit_log_filepath) as implicit_log_file:
            implicit_log = yaml.load(implicit_log_file)
        assert implicit_log.get('finished') is True
        assert implicit_log.get('successful') is True
        assert implicit_log.get('ph') == 8.0
        assert os.path.exists(implicit_metadata_filepath)
        with open(implicit_metadata_filepath) as implicit_metadata_file:
            implicit_metadata = yaml.load(implicit_metadata_file)
        assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == {
            'EGFR_HUMAN_D0': {49: 'ASH'}
        }
        implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath)
        resis = [resi for resi in implicit_model_traj.top.residues]
        resi49 = resis[49]
        resi49_atom_strings = [str(atom) for atom in resi49.atoms]
        assert 'ASP50-HD2' in resi49_atom_strings