def test_simplified_codes(self): traj = pt.fetch_pdb('1l2y') data_full = pt.dssp(traj)[1] data_sim = pt.dssp(traj, simplified=True)[1] expected_1st = ['C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C'] assert expected_1st == data_sim[0].tolist( ), 'test_simplified_codes: must equal'
def test_simplified_codes(self): traj = pt.load(fn("1L2Y.pdb")) pt.dssp(traj)[1] data_sim = pt.dssp(traj, simplified=True)[1] expected_1st = [ 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C' ] assert expected_1st == data_sim[0].tolist( ), 'test_simplified_codes: must equal'
def test_simplified_codes(self): traj = pt.fetch_pdb('1l2y') data_full = pt.dssp(traj)[1] data_sim = pt.dssp(traj, simplified=True)[1] expected_1st = [ 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C' ] assert expected_1st == data_sim[0].tolist( ), 'test_simplified_codes: must equal'
def test_vs_cpptraj(self): data = pt.dssp(self.traj, "*", dtype='cpptraj_dataset') data_int = np.array( [d0.values for d0 in data if d0.dtype == 'integer'], dtype='i4') # load cpptraj output cpp_data = np.loadtxt("./data/dssp.Tc5b.dat", skiprows=1)[:, 1:].T aa_eq(data_int.flatten(), cpp_data.flatten())
def test_vs_cpptraj(self): traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) data = pt.dssp(traj, "*", dtype='cpptraj_dataset') data_int = np.array( [d0.values for d0 in data if d0.dtype == 'integer'], dtype='i4') # load cpptraj output cpp_data = np.loadtxt(fn("dssp.Tc5b.dat"), skiprows=1)[:, 1:].T aa_eq(data_int.flatten(), cpp_data.flatten())
def calculate_angles(traj, residues=[], angles='phi psi'): if residues: data = list( pt.multidihedral(traj, dihedral_types=angles, resrange=residues)) else: # get all residues numbers residues = [int(l[4:]) for l in list(pt.dssp(traj)[0])] data = pt.multidihedral(traj, dihedral_types=angles, resrange=residues) return data
def set_resrange(residues, traj): print("Selected residues:", residues) # use all residues in topology if not residues: residues = [int(l[4:]) for l in list(pt.dssp(traj)[0])] # range of residues elif '-' in residues: start, end = residues.split('-') residues = list(np.arange(int(start), int(end))) # list of residues else: residues = residues.split(',') residues = [int(r) for r in residues] return residues
def test_frame_indices(self): from numpy.testing import assert_equal traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) s_0 = pt.dssp(traj)[1] s_1 = pt.dssp(traj, frame_indices=[0, 2, 5])[1] assert_equal(s_0[[0, 2, 5]], s_1)
def compare_traj2PDB(traj, ref_pdb, tot_res, dih, residue_array=[1,0]): ''' traj is your pytraj trajectory to compare to the pdb structure generated in the python script as traj = pt.iterload([crd], top=top) tot_res is total residue ref_pdb is pdb path e.g ./PDB/1hhp.pdb residue_array is an array has the minimum and maximum residue number if residue_array is not given then function use from residue 1 to total residue ''' #traj = traj[0:99] # 1 ns r0 = residue_array[0] # the lowest residue # # the last or highest residue is total residue if the residue_array is not specify if residue_array[1] == 0: r1 = tot_res else: r1 = residue_array[1] # the highest residue # # reference traj which is the pdb, if you are using a trajectory frame, use cpptraj and make that # frame into a pdb ref_traj = pt.load(ref_pdb) # length of trajectory traj_len = len(traj) # set up a dataframe that frames on the y axis (index) and dihedral, dih, of a residue# on x axis (columns) data_df = pd.DataFrame(index = np.arange(1, traj_len+1, 1)) # dataframe that holds the reference dihedral value data_df_ref = pd.DataFrame() # for each residue in .... for k in np.arange(r0, r1+1, 1): # for phi if (dih == 'phi' and k != 1): # no phi for first residue # cheap method to get the residues, a hack but works # res is the residue name and k is the residue number residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k)) res = residues[0][0:3] # get the index for phi indx = '%s' %(dihmask(int(k), dih, res)) # calculate the phi dihedral value for residue k in the trajectory and store in data1 data1 = cal_dih(traj, indx) # create a column name for the residue. e.g gly17_phi and store values in column data_df['%s%s_%s'%(res,k,dih)] = data1 # repeat for reference, should remove out of the loop? data_ref1 = cal_dih(ref_traj, indx) data_df_ref['%s%s_%s'%(res,k,dih)] = data_ref1 # set to nan for the 1st residue if it is a phi dihedral elif (dih == 'phi' and k ==1): # res is the residue name and k is the residue number residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k)) res = residues[0][0:3] data_df['%s%s_%s'%(res,k,dih)] = "Nan" data_df_ref['%s%s_%s'%(res,k,dih)] = "Nan" # for psi elif (dih == 'psi' and k != tot_res): # no psi for last residue # cheap method to get the residues, a hack but works # res is the residue name and k is the residue number residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k)) res = residues[0][0:3] # get the index for psi indx = '%s' %(dihmask(int(k), dih, res)) # calculate the psi dihedral value for residue k in the trajectory and store in data1 data1 = cal_dih(traj, indx) # create a column name for the residue. e.g gly17_psi and store values in column data_df['%s%s_%s'%(res,k,dih)] = data1 # repeat for reference, should remove out of the loop? data_ref1 = cal_dih(ref_traj, indx) data_df_ref['%s%s_%s'%(res,k,dih)] = data_ref1 # set to nan for the last residue if it is a psi dihedral elif (dih == 'phi' and k == tot_res): # res is the residue name and k is the residue number residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k)) res = residues[0][0:3] data_df['%s%s_%s'%(res,k,dih)] = "Nan" data_df_ref['%s%s_%s'%(res,k,dih)] = "Nan" # dihedral is a chi dihedral else: # cheap method to get the residues, a hack but works # res is the residue name and k is the residue number residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k)) res = residues[0][0:3] # get the index for psi indx = '%s' %(dihmask(int(k), dih, res)) # calculate the chi dihedral value for residue k in the trajectory and store in data1 data1 = cal_dih(traj, indx) if ( (len(data1)) != 0): # if there is values in data1 then there is a chi # create a column name for the residue. e.g pro79_chi1 and store values in column data_df['%s%s_%s'%(res,k,dih)] = data1 # repeat for reference, should remove out of the loop? data_ref1 = cal_dih(ref_traj, indx) data_df_ref['%s%s_%s'%(res,k,dih)] = data_ref1 else: # enter Nan for the missing chi data_df['%s%s_%s'%(res,k,dih)] = "Nan" data_df_ref['%s%s_%s'%(res,k,dih)] = "Nan" # create dih data frame by appending data_df with data_df_ref dih_val = data_df.append(data_df_ref) # name the index frame dih_val.index.name = "Frame" # rename the reference row ref dih_val.rename(index={0:'ref'},inplace=True) # write out dihedral value to 4 decimal place os.system("touch %s_values.dat" %(dih)) dih_val.to_csv("%s_values.dat" %(dih), float_format='%.4f') # now let us get the deviation dev_df = pd.DataFrame(index = np.arange(1, traj_len+1, 1)) # deviation dataframe # for values in given residue calculate the deviation to a ref value for k in np.arange(r0, r1+1, 1): diff=[] residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k)) res = residues[0][0:3] val1 = data_df_ref['%s%s_%s'%(res,k,dih)].values # reference for dihedral of residue k # for value at each frame, calculate the deviation to the reference value val1 for val in data_df['%s%s_%s'%(res,k,dih)].values: if (val != 'Nan'): #check one, either reference or traj val should be always Nan else: # sign is different ,to deal with values on other end of the -180 to 180 spectrum if np.sign(val) == np.sign(val1): # so they have the same sign diff1 = abs(val) - abs(val1) diff1 = abs(diff1) diff.append(diff1[0]) else: # sign is different ,to deal with values on other end of the -180 to 180 spectrum # sum the distance from 180 for each number diff1 = (180 - abs(val)) + (180 - abs(val1)) # sum the distance from 0 for each number diff2 = (abs(val) - 0) + (abs(val1 - 0)) if diff1 <= diff2: diff.append(diff1[0]) else: diff.append(diff2[0]) else: diff.append('nan') dev_df['%s%s_%s'%(res,k,dih)] = diff diff = 0.0 # zero diff since we are appending
def create_PDBdf(pdb_list, tot_res): ''' pdb_list is a list of pdb e.g ['2qnp', '1hhp']. They must be stored in a directory name PDB tot_res is the total number of residues function return a dictionary name pdb. it can be called as pdb[pdbname] e.g pdb['1hhp'] ''' # populate a dataframe for each PDB that contains all the dihedrals pdb = {} #dictionary #get each pdb # strip the less using comma delimiter list_pdb = np.genfromtxt(pdb_list, dtype=str, delimiter=',') for i in list_pdb: # convert to lower case i = i.lower() i = i.strip() # get the PDB path #print (i) j = 'PDB/%s.pdb' %(i) # define PDB as topology using pytraj top = pt.load_topology(j) traj = pt.iterload(j) # set up a dataframe that have residue#, residue name, and the dihedrals up to chi5 data_df = pd.DataFrame(index = np.arange(1, tot_res, 1), columns=["res#", "resname", "phi", "psi", "chi1", "chi2", "chi3", "chi4", "chi5"]) # populate the column residue # data_df['res#'] = np.arange(1, tot_res, 1) # we will calculate the dihedrals for each residued and make a map, k is residue # for k in np.arange(1, tot_res, 1): # cheap method to get the residues, a hack but works residues, ss, _ = pt.dssp(traj, ":%s" %(k)) res = residues[0][0:3] #print (res) # put the residue name in the dataframe data_df.at[k, 'resname'] = '%s' %(res) # do phi first if k != 1 : #no phi for first residue # get the index for phi indx = '%s' %(dihmask(int(k), 'phi', res)) # calculate the dihedral value, and store in data data = cal_dih(traj, indx) data_df.at[k, 'phi'] = data[0] #print (data) # do psi after if k != tot_res: # no psi for last residue # get the index for pytraj dihedral function indx = '%s' %(dihmask(int(k), 'psi', res)) # calculate the dihedral value data = pt.dihedral(traj, indx) data_df.at[k, 'psi'] = data[0] #print (data) # now do chi's chis = ['chi1', 'chi2', 'chi3', 'chi4', 'chi5'] for chi in chis: indx = '%s' %(dihmask(int(k), chi, res)) # calculate the dihedral value if indx != 'empty': data = pt.dihedral(traj, indx) data_df.at[k, chi] = data[0] #make a directory to store reference value os.system("mkdir reference_values") data_df.to_csv("./reference_values/%s_ref.dat" %(i), float_format='%.4f') pdb[i] = data_df # set pdb dataframes return pdb
def test_frame_indices(self): from numpy.testing import assert_equal s_0 = pt.dssp(self.traj)[1] s_1 = pt.dssp(self.traj, frame_indices=[0, 2, 5])[1] assert_equal(s_0[[0, 2, 5]], s_1)
import pytraj as pt pdb = pt.load_pdb_rcsb("1l2y") out = pt.dssp(pdb) print(out)