Example #1
0
def test_trajectory_rmsf(get_fn):
    t = md.load(get_fn('traj.h5'))
    for parallel in [True, False]:
        calculated = md.rmsf(t, t, 0, parallel=parallel)
        t.superpose(t, 0)
        avg_xyz = np.average(t.xyz, axis=0)
        reference = np.sqrt(3*np.mean((t.xyz - avg_xyz)**2, axis=(0, 2)))
        assert np.sum(np.abs(calculated)) > 0 # check trivial error
        eq(calculated, reference, decimal=3)
Example #2
0
    def group_selection(traj_universe, reference_universe, selection,
                        group_selection):

        new_ref = reference_universe.atom_slice(
            topology.select(selection + ' and ' + group_selection))
        new_traj = traj_universe.atom_slice(
            topology.select(selection + ' and ' + group_selection))

        new_traj.superpose(reference=new_ref, parallel=True)
        print(new_traj)
        return md.rmsf(new_traj, new_ref, parallel=True) * 10
Example #3
0
def test_trajectory_rmsf_aligned(get_fn):
    t = md.load(get_fn('traj.h5'))
    for parallel in [True, False]:
        # testing different set of atoms for alignment and RMSF calculation
        atom_indices = range(int(t.n_atoms/2))
        rmsf_indices = range(int(t.n_atoms/2), t.n_atoms)
        t.superpose(t, 99, atom_indices=atom_indices, parallel=False)
        calculated = md.rmsf(t, None, atom_indices=rmsf_indices, parallel=parallel)
        avg_xyz = np.average(t.xyz, axis=0)
        reference = np.sqrt(3*np.mean((t.xyz - avg_xyz)**2, axis=(0, 2)))[rmsf_indices]
        assert np.sum(np.abs(calculated)) > 0 # check trivial error
        eq(calculated, reference, decimal=3)
Example #4
0
    def rmsf_calculation(system_specs, specs):
        """
        

        Parameters
        ----------
        system_specs : TYPE
            DESCRIPTION.
        specs : TYPE
            DESCRIPTION.

        Returns
        -------
        TYPE
            DESCRIPTION.

        """

        (trajectory, topology, results_folder, name) = system_specs
        (selection, start, stop, timestep, stride, units_x, units_y, task,
         store_traj, subset) = specs
        names, indexes, column_index = Featurize.df_template(system_specs,
                                                             unit=[units_y])
        traj = Trajectory.Trajectory.loadTrajectory(system_specs, specs)

        if traj != None:

            atom_indices = traj.topology.select(selection)
            traj.atom_slice(atom_indices, inplace=True)
            traj.center_coordinates()
            rmsf = md.rmsf(traj[start:stop:stride],
                           traj[start:stop:stride],
                           0,
                           precentered=True)

            rows = rows = pd.Index(np.arange(0, len(atom_indices)),
                                   name='Index')
            column_index = pd.MultiIndex.from_product(indexes, names=names)

            df_system = pd.DataFrame(
                rmsf, columns=column_index,
                index=rows)  #index=np.arange(start, stop, stride)
            #df_system.index.rename ='Index'

            #Remove non-sense
            #df_system=df_system.mask(df_system > 90)

            return df_system

        else:
            return pd.DataFrame()
Example #5
0
def cal_rmsf_traj(topologyfile: str,
                  trajfile: str,
                  selection='mass >= 2',
                  mode='residue',
                  outfile=None) -> pd.DataFrame:
    """Calculate the root mean square fluctuation for the trajfile.

    Args:
        topologyfile (str): topology file for the trajectory
        trajfile (str): molecular dynamic simulation trajectory file.
        selection (str, optional): atom index select to calculate the RMSF. Defaults to 'mass >= 2'.
        mode (str, optional): calculation mode, atom or residue. Defaults to 'residue'.
        outfile (str, optional): default is None, outfile to save result.
    Raises:
        Exception: mode not reisude or atom
    Returns:
        pd.DataFrame: RMSF value with data frame
    """
    univer = mda.Universe(topologyfile)
    ChainNames = univer.segments.segids
    traj = md.load(trajfile, top=topologyfile)
    AtomIndex = traj.topology.select(selection)
    TrajSelect = traj.atom_slice(AtomIndex)
    # target, reference, frame of reference
    rmsf = md.rmsf(TrajSelect, TrajSelect, 0)
    topology = TrajSelect.topology
    ColumnNames = ['chain', 'resid', 'resname', 'atomid', 'AtomName', 'RMSF']
    modes = ['residue', 'atom']
    if mode not in modes:
        raise Exception('Unsupport mode: %s, only accept residue or atom.' %
                        mode)

    index = 0
    records = []
    for chain in topology.chains:
        for atom in chain.atoms:
            record = (ChainNames[chain.index], atom.residue.resSeq,
                      atom.residue.name, atom.serial, atom.name, rmsf[index])
            records.append(record)
            index += 1
            #RMSFdict['resid'].append(atom.residue.resSeq)
            #RMSFdict['atom']
    RMSFdf = pd.DataFrame(records, columns=ColumnNames)
    if mode == 'residue':
        RMSFdf = RMSFdf.groupby(by=['chain', 'resid', 'resname'],
                                as_index=False).agg({'RMSF': 'mean'})
    if outfile:
        RMSFdf.to_csv(outfile, index=False)
    return RMSFdf
Example #6
0
def calc_com(traj_file, top_file, bb=False):
    '''
       load traj without water and ions
       and return the com traj of each residue
       Also return the rmsf for each residue for normalization
    '''
    traj = md.load(traj_file, top=top_file)
    top = traj.topology
    N = traj.n_residues
    com_data = list()
    rmsf_data = list()
    rmsf = md.rmsf(traj, traj, 0)
    for i in range(N):
        if bb == True:
            atom_ids = top.select('backbone and resid ' + str(i))
        else:
            atom_ids = top.select('name CA and resid ' + str(i))
        temp_traj = traj.atom_slice(atom_ids)
        rmsf_data.append(rmsf[atom_ids])
        com_data.append(md.compute_center_of_mass(temp_traj))
        del temp_traj
    return com_data, rmsf_data
Example #7
0
def compute_rmsf(traj_selection):
    traj_selection.center_coordinates()  #docs says its faster in this way
    rmsf = md.rmsf(traj_selection, traj_selection,
                   precentered=True)  #precentered only if center_coodinates
    return rmsf
Example #8
0
        coords_k = trj.xyz * 10
        coords_cb_k = coords_k[:, cb_idx, :]
        coords_cb_all.append(coords_cb_k)
    coords_cb_all = np.vstack(coords_cb_all)
    # score all frames
    score_list = []
    for i in tqdm(range(coords_cb_all.shape[0])):
        coords = torch.tensor(coords_cb_all[i], dtype=torch.float, device=device)
        protein = Protein(seq_native, coords, profile_native)
        energy = protein.get_energy(energy_fn).item()
        score_list.append(energy)

    t = md.Trajectory(xyz=coords_cb_all, topology=None)
    t = t.superpose(t, frame=0)
    rmsd = md.rmsd(t, t, frame=0)
    rmsf = md.rmsf(t, t, frame=0)
    df = pd.DataFrame({'energy': score_list, 'rmsd': rmsd})
    df.to_csv(f'{root_dir}/BPTI/BPTI_energy_rmsd.csv', index=False)
    df = pd.DataFrame({'rmsf': rmsf})
    df.to_csv(f'{root_dir}/BPTI/BPTI_rmsf.csv', index=False)


if 'val_deep' in md_data_list:
    root_dir = '/home/hyang/bio/erf/data/decoys/md'
    trj_dir = f'/home/hyang/bio/openmm/data'

    pdb_id_list = pd.read_csv(f'{trj_dir}/list', header=None, names=['pdb'])['pdb'].values
    # pdb_id_list = ['3KXT']
    for pdb_id in tqdm(pdb_id_list):
        pdb_path = f'{root_dir}/{pdb_id}_A_bead.csv'
        seq_native, coords_native, profile_native = load_protein_bead(pdb_path,'CB', device)
Example #9
0
def get_rmsf_data(top,
                  traj,
                  start_frame,
                  stop_frame,
                  stride,
                  selection,
                  superpose,
                  group_selections=None):
    """
        Make RMSF Calculation with Group Selections with mdtraj.

            Parameters
            ----------

            top: topology file

            traj : trajectory file

            start_frame : int
                Include after this snapshot for your analysis.

            stop_frame: int
                Include until this snapshot for your analysis

            stride: int
                It will take snapshots at intervals of the "stride" unit you specified for your analysis

            selection: str
                Normal Selection

            superpose: bool
                Will align Snapshots for best fitting

            group_selections: list
                list of domains selections

            Example
            ----------

            or_rmsf, domain_rmsf, time, residue_list = get_rmsf_data(top='test/protein.pdb', traj='test/50_frame.dcd',
                                                         start_frame=0, stop_frame=49, stride=1,
                                                         selection='backbone and name CA', name='aaa', superpose=True,
                                                         group_selections=["backbone and name CA and resid 0 to 20",
                                                                           "backbone and name CA and resid 21 to 59",
                                                                           "backbone and name CA and resid 60 to 100",
                                                                           "backbone and name CA and resid 101 to 115",
                                                                           "backbone and name CA and resid 116 to 142"])

        """

    global groupselections

    def group_selection(traj_universe, reference_universe, selection,
                        group_selection):

        new_ref = reference_universe.atom_slice(
            topology.select(selection + ' and ' + group_selection))
        new_traj = traj_universe.atom_slice(
            topology.select(selection + ' and ' + group_selection))

        new_traj.superpose(reference=new_ref, parallel=True)
        print(new_traj)
        return md.rmsf(new_traj, new_ref, parallel=True) * 10

    try:
        traj_origin = md.load(traj, top=top, stride=stride)
        ref_origin = md.load(top)
        topology = traj_origin.topology

        traj = traj_origin.atom_slice(topology.select(selection))
        ref = ref_origin.atom_slice(topology.select(selection))

        if traj.n_atoms != ref.n_atoms:
            traj = traj.atom_slice(topology.select(selection))

        if group_selections is not None:
            groupselections = [
                group_selection(traj_origin, ref_origin, selection, s)
                for s in group_selections
            ]

        if superpose:
            traj.superpose(reference=ref, parallel=True)

        if (start_frame and stop_frame) is not None:
            traj = traj[start_frame:stop_frame]

        elif stop_frame is not None:
            traj = traj[:stop_frame]

        elif start_frame is not None:
            traj = traj[start_frame:]

        all_rmsf_data_struct = {
            'origin_RMSF': md.rmsf(traj, ref, parallel=True) * 10,
            'groupSelection_RMSF': groupselections,
            'time': traj.time,
            'residues': list(range(0, traj.n_atoms))
        }

        ori_selection_rmsf = list(all_rmsf_data_struct['origin_RMSF'])
        selection_rmsf = all_rmsf_data_struct['origin_RMSF']
        domain_base_rmsf = all_rmsf_data_struct['groupSelection_RMSF']

        count = 0
        for i in range(len(groupselections)):
            selection_rmsf[count:len(domain_base_rmsf[i]) +
                           count] = domain_base_rmsf[i]
            count = len(domain_base_rmsf[i]) + count

        return ori_selection_rmsf, selection_rmsf, all_rmsf_data_struct[
            'time'], all_rmsf_data_struct['residues']

    except Exception as Error:
        print(Error)
        print("problem in rmsf calculation")
for method in methods:
    
    #making a fake combined trajectory where each frame is one centroid of the method
    traj_0 = md.load(path+method+'/'+method+'_0.pdb')
    for x in range(1,10):
        for k in os.listdir(path+method+'/'):
            if k == (method+'_'+str(x)+'.pdb'):
                print(k)
                traj = md.load(path+method+'/'+k)
                joined_traj = traj.join(traj_0)

    joined_traj = joined_traj.superpose(traj_0,0)
    joined_traj.save_pdb(home+'pdb_trajs/'+method+'_traj.pdb')
    
    #calculating the rmsf
    rmsf = (md.rmsf(joined_traj,traj_0)) * 10 #mdtraj calculates in nanometers, so need to convert to angstroms    
    print(len(rmsf))
    
    #replacing the b-factor
    with open(path+'XTAL/XTAL_0.pdb','r') as data:
        with open(home+'pdb_figures/'+method+'_fig.pdb','w') as writefile:
            data = data.readlines()
            
            writefile.write(data[0])
            writefile.write(data[1])
            
            for line,value in zip(data[2:3326],rmsf):
                
                val = round(value,3)
                adjust = str(val).rjust(6)
                newline = line.replace((line[60:67]), adjust)