Beispiel #1
0
 def test_simplified_codes(self):
     traj = pt.fetch_pdb('1l2y')
     data_full = pt.dssp(traj)[1]
     data_sim = pt.dssp(traj, simplified=True)[1]
     expected_1st = ['C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H',
                     'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C']
     assert expected_1st == data_sim[0].tolist(
     ), 'test_simplified_codes: must equal'
Beispiel #2
0
 def test_simplified_codes(self):
     traj = pt.load(fn("1L2Y.pdb"))
     pt.dssp(traj)[1]
     data_sim = pt.dssp(traj, simplified=True)[1]
     expected_1st = [
         'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H',
         'H', 'C', 'C', 'C', 'C', 'C', 'C'
     ]
     assert expected_1st == data_sim[0].tolist(
     ), 'test_simplified_codes: must equal'
Beispiel #3
0
 def test_simplified_codes(self):
     traj = pt.fetch_pdb('1l2y')
     data_full = pt.dssp(traj)[1]
     data_sim = pt.dssp(traj, simplified=True)[1]
     expected_1st = [
         'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H',
         'H', 'C', 'C', 'C', 'C', 'C', 'C'
     ]
     assert expected_1st == data_sim[0].tolist(
     ), 'test_simplified_codes: must equal'
Beispiel #4
0
 def test_vs_cpptraj(self):
     data = pt.dssp(self.traj, "*", dtype='cpptraj_dataset')
     data_int = np.array(
         [d0.values for d0 in data if d0.dtype == 'integer'], dtype='i4')
     # load cpptraj output
     cpp_data = np.loadtxt("./data/dssp.Tc5b.dat", skiprows=1)[:, 1:].T
     aa_eq(data_int.flatten(), cpp_data.flatten())
Beispiel #5
0
 def test_vs_cpptraj(self):
     traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top'))
     data = pt.dssp(traj, "*", dtype='cpptraj_dataset')
     data_int = np.array(
         [d0.values for d0 in data if d0.dtype == 'integer'], dtype='i4')
     # load cpptraj output
     cpp_data = np.loadtxt(fn("dssp.Tc5b.dat"), skiprows=1)[:, 1:].T
     aa_eq(data_int.flatten(), cpp_data.flatten())
Beispiel #6
0
 def test_vs_cpptraj(self):
     data = pt.dssp(self.traj, "*", dtype='cpptraj_dataset')
     data_int = np.array(
         [d0.values for d0 in data if d0.dtype == 'integer'],
         dtype='i4')
     # load cpptraj output
     cpp_data = np.loadtxt("./data/dssp.Tc5b.dat", skiprows=1)[:, 1:].T
     aa_eq(data_int.flatten(), cpp_data.flatten())
Beispiel #7
0
def calculate_angles(traj, residues=[], angles='phi psi'):
    if residues:
        data = list(
            pt.multidihedral(traj, dihedral_types=angles, resrange=residues))
    else:
        # get all residues numbers
        residues = [int(l[4:]) for l in list(pt.dssp(traj)[0])]
        data = pt.multidihedral(traj, dihedral_types=angles, resrange=residues)
    return data
Beispiel #8
0
def set_resrange(residues, traj):
    print("Selected residues:", residues)
    # use all residues in topology
    if not residues:
        residues = [int(l[4:]) for l in list(pt.dssp(traj)[0])]

    # range of residues
    elif '-' in residues:
        start, end = residues.split('-')
        residues = list(np.arange(int(start), int(end)))

    # list of residues
    else:
        residues = residues.split(',')
        residues = [int(r) for r in residues]
    return residues
Beispiel #9
0
 def test_frame_indices(self):
     from numpy.testing import assert_equal
     traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top'))
     s_0 = pt.dssp(traj)[1]
     s_1 = pt.dssp(traj, frame_indices=[0, 2, 5])[1]
     assert_equal(s_0[[0, 2, 5]], s_1)
Beispiel #10
0
def compare_traj2PDB(traj, ref_pdb, tot_res, dih, residue_array=[1,0]):
  '''
  traj is your pytraj trajectory to compare to the pdb structure 
   generated in the python script as traj = pt.iterload([crd], top=top)    
  tot_res is total residue 
  ref_pdb is pdb path e.g ./PDB/1hhp.pdb
  residue_array is an array has the minimum and maximum residue number
  if residue_array is not given then function use from residue 1 to total residue
  '''
  #traj = traj[0:99] # 1 ns
  r0 = residue_array[0] # the lowest residue #
  # the last or highest residue is total residue if the residue_array is not specify
  if residue_array[1] == 0:
    r1 = tot_res
  else:
    r1 = residue_array[1] # the highest residue #

  # reference traj which is the pdb, if you are using a trajectory frame, use cpptraj and make that 
  # frame into a pdb  
  ref_traj = pt.load(ref_pdb)    
  # length of trajectory
  traj_len = len(traj)
  # set up a dataframe that frames on the y axis (index) and dihedral, dih, of a residue# on x axis (columns)
  data_df = pd.DataFrame(index = np.arange(1, traj_len+1, 1)) 
  # dataframe that holds the reference dihedral value 
  data_df_ref = pd.DataFrame()
  # for each residue in ....
  for k in np.arange(r0, r1+1, 1): 
    # for phi
    if (dih == 'phi' and k != 1): # no phi for first residue 
      # cheap method to get the residues, a hack but works
      # res is the residue name and k is the residue number 
      residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k))
      res = residues[0][0:3]
      # get the index for phi 
      indx = '%s' %(dihmask(int(k), dih, res)) 
      # calculate the phi dihedral value for residue k in the trajectory and store in data1
      data1 = cal_dih(traj, indx)
      # create a column name for the residue. e.g gly17_phi and store values in column
      data_df['%s%s_%s'%(res,k,dih)] = data1
      # repeat for reference, should remove out of the loop?
      data_ref1 = cal_dih(ref_traj, indx)
      data_df_ref['%s%s_%s'%(res,k,dih)] = data_ref1
    # set to nan for the 1st residue if it is a phi dihedral
    elif (dih == 'phi' and k ==1): 
      # res is the residue name and k is the residue number 
      residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k))
      res = residues[0][0:3]
      data_df['%s%s_%s'%(res,k,dih)] = "Nan"
      data_df_ref['%s%s_%s'%(res,k,dih)] = "Nan"
    # for psi 
    elif (dih == 'psi' and k != tot_res): # no psi for last residue
      # cheap method to get the residues, a hack but works
      # res is the residue name and k is the residue number 
      residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k))
      res = residues[0][0:3]
      # get the index for psi 
      indx = '%s' %(dihmask(int(k), dih, res)) 
      # calculate the psi dihedral value for residue k in the trajectory and store in data1
      data1 = cal_dih(traj, indx)
      # create a column name for the residue. e.g gly17_psi and store values in column
      data_df['%s%s_%s'%(res,k,dih)] = data1
      # repeat for reference, should remove out of the loop?
      data_ref1 = cal_dih(ref_traj, indx)
      data_df_ref['%s%s_%s'%(res,k,dih)] = data_ref1
    # set to nan for the last residue if it is a psi dihedral
    elif (dih == 'phi' and k == tot_res): 
      # res is the residue name and k is the residue number 
      residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k))
      res = residues[0][0:3]
      data_df['%s%s_%s'%(res,k,dih)] = "Nan"
      data_df_ref['%s%s_%s'%(res,k,dih)] = "Nan"
    # dihedral is a chi dihedral 
    else: 
      # cheap method to get the residues, a hack but works
      # res is the residue name and k is the residue number 
      residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k))
      res = residues[0][0:3]
      # get the index for psi 
      indx = '%s' %(dihmask(int(k), dih, res)) 
      # calculate the chi dihedral value for residue k in the trajectory and store in data1
      data1 = cal_dih(traj, indx)
      if ( (len(data1)) != 0): # if there is values in data1 then there is a chi
        # create a column name for the residue. e.g pro79_chi1 and store values in column
        data_df['%s%s_%s'%(res,k,dih)] = data1
        # repeat for reference, should remove out of the loop?
        data_ref1 = cal_dih(ref_traj, indx)
        data_df_ref['%s%s_%s'%(res,k,dih)] = data_ref1
      else: # enter Nan for the missing chi 
        data_df['%s%s_%s'%(res,k,dih)] = "Nan"
        data_df_ref['%s%s_%s'%(res,k,dih)] = "Nan"
  # create dih data frame by appending data_df with data_df_ref
  dih_val = data_df.append(data_df_ref)
  # name the index frame
  dih_val.index.name = "Frame"
  # rename the reference row ref 
  dih_val.rename(index={0:'ref'},inplace=True)
  # write out dihedral value to 4 decimal place
  os.system("touch %s_values.dat" %(dih))
  dih_val.to_csv("%s_values.dat" %(dih), float_format='%.4f') 
  # now let us get the deviation 
  dev_df = pd.DataFrame(index = np.arange(1, traj_len+1, 1)) # deviation dataframe
  # for values in given residue calculate the deviation to a ref value 
  for k in np.arange(r0, r1+1, 1): 
    diff=[]
    residues, ss, _ = pt.dssp(ref_traj, ":%s" %(k))
    res = residues[0][0:3]
    val1 = data_df_ref['%s%s_%s'%(res,k,dih)].values # reference for dihedral of residue k 
    # for value at each frame, calculate the deviation to the reference value val1
    for val in data_df['%s%s_%s'%(res,k,dih)].values:
      if (val != 'Nan'): #check one, either reference or traj val should be always Nan

      else: # sign is different ,to deal with values on other end of the -180 to 180 spectrum
        if np.sign(val) == np.sign(val1): # so they have the same sign
          diff1 = abs(val) - abs(val1)
          diff1 = abs(diff1)
          diff.append(diff1[0])
        else: # sign is different ,to deal with values on other end of the -180 to 180 spectrum
          # sum the distance from 180 for each number 
          diff1 = (180 - abs(val)) + (180 - abs(val1))
          # sum the distance from 0 for each number
          diff2 = (abs(val) - 0) + (abs(val1 - 0))
          if diff1 <= diff2:
            diff.append(diff1[0])
          else:
            diff.append(diff2[0])
      else: 
        diff.append('nan')
    dev_df['%s%s_%s'%(res,k,dih)] = diff
    diff = 0.0 # zero diff since we are appending 
Beispiel #11
0
def create_PDBdf(pdb_list, tot_res):
  '''
  pdb_list is a list of pdb e.g ['2qnp', '1hhp']. They must be stored in a directory name PDB
  tot_res is the total number of residues
  function return a dictionary name pdb. it can be called as pdb[pdbname]
  e.g pdb['1hhp']
  '''
  # populate a dataframe for each PDB that contains all the dihedrals 
  pdb = {} #dictionary 
  #get each pdb
  # strip the less using comma delimiter
  list_pdb = np.genfromtxt(pdb_list, dtype=str, delimiter=',')
  for i in list_pdb:
    # convert to lower case
    i = i.lower()
    i = i.strip()
    # get the PDB path
    #print (i) 
    j = 'PDB/%s.pdb' %(i)
    # define PDB as topology using pytraj
    top = pt.load_topology(j)
    traj = pt.iterload(j)
    # set up a dataframe that have residue#, residue name, and the dihedrals up to chi5
    data_df = pd.DataFrame(index = np.arange(1, tot_res, 1), columns=["res#", "resname", "phi", "psi", "chi1", "chi2", "chi3", "chi4", "chi5"])
    # populate the column residue #
    data_df['res#'] = np.arange(1, tot_res, 1)
    # we will calculate the dihedrals for each residued and make a map, k is residue #
    for k in np.arange(1, tot_res, 1): 
      # cheap method to get the residues, a hack but works
      residues, ss, _ = pt.dssp(traj, ":%s" %(k))
      res = residues[0][0:3]
      #print (res)
      # put the residue name in the dataframe
      data_df.at[k, 'resname'] = '%s' %(res) 
      # do phi first
      if k != 1 : #no phi for first residue 
        # get the index for phi 
        indx = '%s' %(dihmask(int(k), 'phi', res)) 
        # calculate the dihedral value, and store in data
        data = cal_dih(traj, indx)
        data_df.at[k, 'phi'] = data[0]
        #print (data)
      # do psi after
      if k != tot_res: # no psi for last residue
        # get the index for pytraj dihedral function
        indx = '%s' %(dihmask(int(k), 'psi', res)) 
        # calculate the dihedral value
        data = pt.dihedral(traj, indx)
        data_df.at[k, 'psi'] = data[0]
        #print (data)
      # now do chi's
      chis = ['chi1', 'chi2', 'chi3', 'chi4', 'chi5']
      for chi in chis: 
        indx = '%s' %(dihmask(int(k), chi, res)) 
        # calculate the dihedral value
        if indx != 'empty': 
          data = pt.dihedral(traj, indx)
          data_df.at[k, chi] = data[0]
    #make a directory to store reference value
    os.system("mkdir reference_values")
    data_df.to_csv("./reference_values/%s_ref.dat" %(i), float_format='%.4f') 
    pdb[i] = data_df # set pdb dataframes
  return pdb
Beispiel #12
0
 def test_frame_indices(self):
     from numpy.testing import assert_equal
     s_0 = pt.dssp(self.traj)[1]
     s_1 = pt.dssp(self.traj, frame_indices=[0, 2, 5])[1]
     assert_equal(s_0[[0, 2, 5]], s_1)
import pytraj as pt

pdb = pt.load_pdb_rcsb("1l2y")

out = pt.dssp(pdb)
print(out)
Beispiel #14
0
 def test_frame_indices(self):
     from numpy.testing import assert_equal
     s_0 = pt.dssp(self.traj)[1]
     s_1 = pt.dssp(self.traj, frame_indices=[0, 2, 5])[1]
     assert_equal(s_0[[0, 2, 5]], s_1)