def box_of_element(element, spacing, \ xmin, xmax, ymin, ymax, zmin, zmax, outfile): xs = np.arange(np.float(xmin), np.float(xmax), np.float(spacing)) ys = np.arange(np.float(ymin), np.float(ymax), np.float(spacing)) zs = np.arange(np.float(zmin), np.float(zmax), np.float(spacing)) xx, yy, zz = np.meshgrid(xs, ys, zs) coords = np.stack([xx.flatten(), yy.flatten(), zz.flatten()], axis=1) len = coords.shape[0] df = pd.DataFrame(data={ \ 'line_idx':np.arange(len), \ 'record_name':['ATOM']*len, \ 'atom_number':np.arange(1,len+1), \ 'blank_1':[' ']*len, \ 'atom_name':[element]*len, \ 'alt_loc':[' ']*len, \ 'residue_name':['DUM']*len, \ 'blank_2':[' ']*len, \ 'chain_id':['X']*len, \ 'residue_number':[1]*len, \ 'insertion':[' ']*len, \ 'blank_3':[' ']*len, \ 'x_coord':coords[:,0], \ 'y_coord':coords[:,1], \ 'z_coord':coords[:,2], \ 'occupancy':[0.50]*len, \ 'b_factor':[35.88]*len, \ 'blank_4':[' ']*len, \ 'segment_id':['X1']*len, \ 'element_symbol':[element]*len, \ 'charge':[0.0]*len}) pdb = PandasPdb() pdb.df['ATOM'] = df pdb.to_pdb(path=outfile, records=['ATOM'], gz=False, append_newline=True) return df
def test_multichain(): TESTDATA_5mtn = os.path.join(os.path.dirname(__file__), 'data', '5mtn_multichain.pdb') mtn = PandasPdb() mtn.read_pdb(TESTDATA_5mtn) expect_res_a = ['S', 'L', 'E', 'P', 'E', 'P', 'W', 'F', 'F', 'K', 'N', 'L', 'S', 'R', 'K', 'D', 'A', 'E', 'R', 'Q', 'L', 'L', 'A', 'P', 'G', 'N', 'T', 'H', 'G', 'S', 'F', 'L', 'I', 'R', 'E', 'S', 'E', 'S', 'T', 'A', 'G', 'S', 'F', 'S', 'L', 'S', 'V', 'R', 'D', 'F', 'D', 'Q', 'G', 'E', 'V', 'V', 'K', 'H', 'Y', 'K', 'I', 'R', 'N', 'L', 'D', 'N', 'G', 'G', 'F', 'Y', 'I', 'S', 'P', 'R', 'I', 'T', 'F', 'P', 'G', 'L', 'H', 'E', 'L', 'V', 'R', 'H', 'Y', 'T'] expect_res_b = ['S', 'V', 'S', 'S', 'V', 'P', 'T', 'K', 'L', 'E', 'V', 'V', 'A', 'A', 'T', 'P', 'T', 'S', 'L', 'L', 'I', 'S', 'W', 'D', 'A', 'P', 'A', 'V', 'T', 'V', 'V', 'Y', 'Y', 'L', 'I', 'T', 'Y', 'G', 'E', 'T', 'G', 'S', 'P', 'W', 'P', 'G', 'G', 'Q', 'A', 'F', 'E', 'V', 'P', 'G', 'S', 'K', 'S', 'T', 'A', 'T', 'I', 'S', 'G', 'L', 'K', 'P', 'G', 'V', 'D', 'Y', 'T', 'I', 'T', 'V', 'Y', 'A', 'H', 'R', 'S', 'S', 'Y', 'G', 'Y', 'S', 'E', 'N', 'P', 'I', 'S', 'I', 'N', 'Y', 'R', 'T'] transl = mtn.amino3to1() expect_chain = ['A' for _ in range(88)] + ['B' for _ in range(94)] got_chain = list(transl['chain_id'].values) got_res_a = list(transl.loc[transl['chain_id'] == 'A', 'residue_name'].values) got_res_b = list(transl.loc[transl['chain_id'] == 'B', 'residue_name'].values) assert expect_chain == got_chain assert expect_res_a == got_res_a assert expect_res_b == got_res_b
def test_rna_and_nonmatching_indices(): ehz = PandasPdb().read_pdb(TESTDATA_rna) at = ehz.df['ATOM'] a64 = at[at['residue_number'] == 64] a66 = at[at['residue_number'] == 66] r = PandasPdb.rmsd(a64, a66) assert r == 10.2007, r
def get_lig_name(PDB, lig_list): #load in PDB #print(PDB) ppdb = PandasPdb() #structure = parser.get_structure(PDB, PDB+'.pdb') ppdb.read_pdb(PDB + '.pdb') HETATM = ppdb.df['HETATM'] residue_names = set(HETATM['residue_name']) lig_res_number = 0 atoms = 0 #base number of atoms for a ligand all_ligands = [] _lig_name = '' for i in residue_names: all_ligands.append(i) if i in set(lig_list['Lig_name']): continue else: subset = (ppdb.df['HETATM']['residue_name'] == i) lig_res_nubmer = subset.values.sum() if lig_res_number >= atoms: _lig_name = i atoms = lig_res_nubmer else: continue #removing HOH from restrained ligand list all_ligands.remove('HOH') with open('ligand_name.txt', 'w') as file: file.write(_lig_name) with open('ligand_list.txt', 'w') as file: for i in range(0, len(all_ligands)): file.write('resname ' + str(all_ligands[i]) + ' or ') return _lig_name
def output_centers(centers, element, out_file, xlabel=10, ylabel=11, zlabel=12): df_len = len(centers.index) df_data={ \ 'line_idx':np.arange(df_len), \ 'record_name':['ATOM']*df_len, \ 'atom_number':np.arange(1,df_len+1), \ 'blank_1':[' ']*df_len, \ 'atom_name':[element]*df_len, \ 'alt_loc':[' ']*df_len, \ 'residue_name':['DUM']*df_len, \ 'blank_2':[' ']*df_len, \ 'chain_id':['X']*df_len, \ 'residue_number':[1]*df_len, \ 'insertion':[' ']*df_len, \ 'blank_3':[' ']*df_len, \ 'x_coord':centers[[xlabel]].to_numpy().reshape((df_len,)), \ 'y_coord':centers[[ylabel]].to_numpy().reshape((df_len,)), \ 'z_coord':centers[[zlabel]].to_numpy().reshape((df_len,)), \ 'occupancy':[0.50]*df_len, \ 'b_factor':[35.88]*df_len, \ 'blank_4':[' ']*df_len, \ 'segment_id':['X1']*df_len, \ 'element_symbol':[element]*df_len, \ 'charge':[0.0]*df_len} print(df_data) df = pd.DataFrame(data=df_data) pdb = PandasPdb() pdb.df['ATOM'] = df pdb.to_pdb(path=out_file, records=['ATOM'], gz=False, append_newline=True) return df
def main(args): global model_name if args.input_type == 'pdb_id': struct = PandasPdb().fetch_pdb(args.input) model_name = args.input elif args.input_type == 'structure': struct = PandasPdb() struct = struct.read_pdb(args.input) model_name = re.search('[\d\w]+$', struct.header).group() global resolution try: resolution = float( re.search("REMARK\s+2\s+RESOLUTION\.\s+(\d\.\d+)", struct.pdb_text).group(1)) except: resolution = 100 global header try: header = re.search("COMPND\s+2\s+MOLECULE\:\s+(.+)\S+", struct.pdb_text).group(1) except: header = model_name permition = filter()
def run_test(prot_dir, protein, data): results_df = pd.DataFrame(columns=[ 'gene_name', 'uniprot_ID', 'permutation risk', 'permutation prot' ]) file_repo = 'SWISS-MODEL_Repository/' + prot_dir + '/swissmodel/' if os.path.isdir(file_repo): try: print(file_repo) pdb_file = file_repo + str(os.listdir(file_repo)[0]) ppdb = PandasPdb().read_pdb(pdb_file) df = pd.DataFrame(ppdb.df['ATOM']) sequence = ppdb.amino3to1() protein_spec_df = data[data['uniprot_repo'] == prot_dir] gene_name = protein_spec_df['gene'].values[0] uniprot_ID = protein_spec_df['uniprot'].values[0] protein_spec_df = protein_spec_df[[ 'mutation', 'effect_size', 'p-value', 'transition' ]] df_write = "protein_structs/" + gene_name + '.csv' df.to_csv(df_write, header=None, index=None, sep='\t') write_to_dir = 'protein_mutation_locs_txts/' + gene_name + '.T2D.txt' protein_spec_df.to_csv(write_to_dir, header=None, index=None, sep='\t') protein_df = pd.read_csv(write_to_dir, header=None, sep="\t") muts_df = tests.make_dataframe(df, protein_df, sequence) print(muts_df) if not (muts_df[muts_df['score'] > 0].empty or muts_df[muts_df['score'] < 0].empty): risk = tests.get_dist_vec(muts_df, True) prot = tests.get_dist_vec(muts_df, False) #mw = tests.mannwhitneyu(risk, prot) #print(mw.pvalue) perm = tests.run_permutation(muts_df, df, np.mean(risk), np.mean(prot), 1000) print(perm) new_row = { 'gene_name': gene_name, 'uniprot_ID': uniprot_ID, "permutation risk": perm[0], "permutation prot": perm[1] } results_df.append(new_row, ignore_index=True) out_csv = 'parallelized/' + str(protein) + '-pval.csv' results_df.to_csv(out_csv) except Exception as e: print(e) pass
def test_sameindex(): TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data', '1t48_995.pdb') p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) print(p1t48) p1t48.df['ATOM'].index = np.zeros(p1t48.df['ATOM'].shape[0], dtype=int) expect_res = [ 'M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q', 'I', 'D', 'K', 'S', 'G', 'S', 'W', 'A', 'A', 'I', 'Y', 'Q', 'D', 'I', 'R', 'H', 'E', 'A', 'S', 'D', 'F', 'P', 'C', 'R', 'V', 'A', 'K', 'L', 'P', 'K', 'N', 'K', 'N', 'R', 'N', 'R', 'Y', 'R', 'D', 'V', 'S', 'P', 'F', 'D', 'H', 'S', 'R', 'I', 'K', 'L', 'H', 'Q', 'E', 'D', 'N', 'D', 'Y', 'I', 'N', 'A', 'S', 'L', 'I', 'K', 'M', 'E', 'E', 'A', 'Q', 'R', 'S', 'Y', 'I', 'L', 'T', 'Q', 'G', 'P', 'L', 'P', 'N', 'T', 'C', 'G', 'H', 'F', 'W', 'E', 'M', 'V', 'W', 'E', 'Q', 'K', 'S', 'R', 'G', 'V', 'V', 'M', 'L', 'N', 'R', 'V', 'M', 'E', 'K', 'G', 'S', 'L', 'K' ] transl = p1t48.amino3to1() expect_chain = ['A' for _ in range(transl.shape[0])] got_chain = list(transl['chain_id'].values) got_res = list(transl['residue_name'].values) assert expect_chain == got_chain assert expect_res == got_res
def import_pdb_with_biopandas(fname, label=None): from biopandas.pdb import PandasPdb cite = ''' @article{raschkas2017biopandas, doi = {10.21105/joss.00279}, url = {http://dx.doi.org/10.21105/joss.00279}, year = {2017}, month = {jun}, publisher = {The Open Journal}, volume = {2}, number = {14}, author = {Sebastian Raschka}, title = {BioPandas: Working with molecular structures in pandas DataFrames}, journal = {The Journal of Open Source Software} } ''' print(cite) #import numpy as np #from biopandas.pdb import PandasPdb ppdb = PandasPdb() ppdb.read_pdb(fname) properties = ppdb.df['ATOM'].dtypes.index num_of_atoms = ppdb.df['ATOM'][properties[1]].max() frame_nums = ppdb.df['ATOM'][properties[1]].size // num_of_atoms coords = ppdb.df['ATOM'][properties[11:14]].to_numpy().reshape( (frame_nums, num_of_atoms, 3)) properties = ppdb.df['OTHERS'].dtypes.index rows = str(ppdb.df['OTHERS'][properties[1]][1::3].values).split() time = np.asarray([float(x.replace('time=', '')) for x in rows[2::8]]) energy = np.asarray([float(x.replace('energy=', '')) for x in rows[4::8]]) return coords, time, energy
def test_pdb_with_insertion_codes(): PDB_2D7T_PATH = os.path.join(os.path.dirname(__file__), 'data', '2d7t.pdb') ppdb = PandasPdb().read_pdb(PDB_2D7T_PATH) sequence = ppdb.amino3to1() assert "".join(sequence[50:60]['residue_name'].values) == 'INPKSGDTNY'
def test_read_pdb(): """Test public read_pdb""" ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) assert ppdb.pdb_text == three_eiy assert ppdb.code == '3eiy', ppdb.code assert ppdb.pdb_path == TESTDATA_FILENAME
def parse_ligand_from_pdb(pdb_id, base_folder): """ Identifies drug-like ligands from PDB input file. :param pdb_id: :param base_folder: :return: """ # Read PDB file into PandasPDB df ppdb = PandasPdb() ppdb.read_pdb("{}/{}/{}.pdb".format(base_folder, pdb_id, pdb_id)) # Subset df to hetatms hetatm_df = ppdb.df['HETATM'] # Read in ligands lig_to_remove_df = pd.read_csv( "~/Fraser_Lab/phenix_pipeline/ligands_to_remove.csv") lig_to_remove_df.columns = ["name", "unknown"] # Get list of unique residue names residue_names = list(set(hetatm_df['residue_name'])) lig_name = '' for res_name in residue_names: # all_ligands.append(i) if res_name not in set(lig_to_remove_df['name']): print("###################################") lig_name = res_name print(lig_name) return lig_name
def pdb_atoms(filename): bottleneck_pdb = PandasPdb() bottleneck_pdb.read_pdb(filename) df = bottleneck_pdb.df['ATOM'] radii = np.array([get_vdwr(i) for i in get_elem(df)]) coords = df.filter(items=stat_items).to_numpy() n = coords.shape[0] return df, radii, coords, n
def load_custom_pdb(filepath): ''' get the 'ATOM' key and set the line index as default index ''' ppdb = PandasPdb() ppdb.read_pdb(filepath) df_atoms = ppdb.df["ATOM"].set_index(["line_idx"]) return df_atoms
def show_info(self, selected): ppdb = PandasPdb() ppdb.read_pdb(self.folder + '/' + selected) info = '\nRaw PDB file contents:\n\n%s\n...' % ppdb.pdb_text[:1000] self.mol_info.set_text(info) return
def test_read_pdb_from_list(): """Test public read_pdb_from_list""" for pdb_text, code in zip([three_eiy, four_eiy], ['3eiy', '4eiy']): ppdb = PandasPdb() ppdb.read_pdb_from_list(pdb_text.splitlines(True)) assert ppdb.pdb_text == pdb_text assert ppdb.code == code assert ppdb.pdb_path == ''
def persist(self): """ Save .npy files of the different averages and pdb files with the beta column set to importance :return: itself """ directory = self.working_dir + "/{}/".format(self.extractor.name) if not os.path.exists(directory): os.makedirs(directory) np.save(directory + "importance_per_residue", self.importance_per_residue) np.save(directory + "std_importance_per_residue", self.std_importance_per_residue) np.save(directory + "feature_importance", self.feature_importances) np.save(directory + "std_feature_importance", self.std_feature_importances) if self.importance_per_residue_and_cluster is not None and self.std_importance_per_residue_and_cluster is not None: np.save(directory + "importance_per_residue_and_cluster", self.importance_per_residue_and_cluster) np.save(directory + "std_importance_per_residue_and_cluster", self.std_importance_per_residue_and_cluster) if self.separation_score is not None: np.save(directory + 'separation_score', self.separation_score) if self.predefined_relevant_residues is not None: np.save(directory + "predefined_relevant_residues", self.predefined_relevant_residues) if self.accuracy is not None: np.save(directory + 'accuracy', self.accuracy) if self.accuracy_per_cluster is not None: np.save(directory + 'accuracy_per_cluster', self.accuracy_per_cluster) if self.test_set_errors is not None: np.save(directory + 'test_set_errors', self.test_set_errors) if self.feature_to_resids is not None: np.save(directory + 'feature_to_resids', self.feature_to_resids) if self.pdb_file is not None: pdb = PandasPdb() pdb.read_pdb(self.pdb_file) self._save_to_pdb( pdb, directory + "importance.pdb", self._map_to_correct_residues(self.importance_per_residue)) if self.importance_per_residue_and_cluster is not None: for cluster_idx, importance in enumerate( self.importance_per_residue_and_cluster.T): cluster_name = "cluster_{}".format(cluster_idx) \ if self.extractor.label_names is None else \ self.extractor.label_names[cluster_idx] self._save_to_pdb( pdb, directory + "{}_importance.pdb".format(cluster_name), self._map_to_correct_residues(importance)) return self
def get_seq(struc): structure = PandasPdb().read_pdb(struc) sequences = structure.amino3to1( ) # cols = ['chain_id', 'residue_name'] seqs = [ ''.join(sequences.loc[sequences['chain_id'] == i, 'residue_name'].to_list()) for i in sequences['chain_id'].unique() ] return seqs[0] if len(seqs) == 1 else seqs
def test_equal(): TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data', '1t48_995.pdb') p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), record='ATOM') expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597, 1.252510], index=[12, 13, 14, 15, 16]) assert dist[dist < 3].all() == expect.all()
def test_use_external_df(): TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data', '1t48_995.pdb') p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) new_df = p1t48.df['ATOM'].iloc[:-1, :].copy() dist = p1t48.distance(df=new_df, xyz=(70.785, 15.477, 23.359)) expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597], index=[12, 13, 14, 15]) assert dist[dist < 3].all() == expect.all()
def expanded_bottleneck(src_file,trg_file,factor): src_df, src_radii, n, mean, coords, coords_u, coords_s, coords_vh, proj_xy, src_plot_df = proj_stats(src_file) expanded_proj = factor*proj_xy fat_ep = np.concatenate(expanded_proj,coords_s[2]*coords_u[:,2]) coords = np.matmul(fat_ep,coords_vh) coords += mean df_coords = pd.DataFrame(coords,columns=stat_items) trg_df = src_df.copy() trg_df[stat_items]=df_coords[stat_items] trg_pdb = PandasPdb() trg_pdb.df['ATOM'] = trg_df trg_pdb.to_pdb(path=trg_file, records=['ATOM'], gz=False, append_newline=True)
def test_fetch_pdb(): """Test fetch_pdb""" try: ppdb = PandasPdb() txt = ppdb._fetch_pdb('3eiy') except HTTPError: pass if txt: # skip if PDB down txt[:100] == three_eiy[:100] ppdb.fetch_pdb('3eiy') assert ppdb.pdb_text == txt
def get_neighbors(path, device): res_encoder = {'LYS': 1, 'GLU': 2, 'ASP': 3, 'SER': 4, 'PHE': 5, 'CYS': 6, 'VAL': 7, 'ILE': 8, 'MET': 9, 'HIS': 10, 'GLY': 11, 'LEU': 12, 'TYR': 13, 'THR': 14, 'PRO': 15, 'ARG': 16, 'TRP': 17, 'ALA': 18, 'GLN': 19, 'ASN': 20, 'SEC': 21, 'UNK': 21, 'ASX': 21, 'GLX': 21, 'XLE': 21, 'PYL': 21} ppdb = PandasPdb() ppdb.read_pdb(path=path) # Load through read_ply function. mol_name = path.rsplit('/', 1)[1].split('.')[0] train = True try: structure = read_ply('./structures/train/{}.ply'.format(mol_name)) except FileNotFoundError: structure = read_ply('./structures/test/{}.ply'.format(mol_name)) train = False nodes = structure.pos.to(device).float() n_nodes = nodes.shape[0] pos = ['x_coord', 'y_coord', 'z_coord'] atoms = torch.tensor(ppdb.df['ATOM'][pos].values).to(device).float() atom_shape = atoms.shape atoms = atoms.view(-1, 1, 3).expand(-1, n_nodes, 3) closest_atom = (atoms-nodes).norm(dim=2).argmin(dim=0) structure_residues = ppdb.df['ATOM'][['atom_number', 'residue_name']] n_atoms = structure_residues.shape[0] idx_translation = torch.LongTensor(structure_residues.residue_name. replace(res_encoder)).to(device) node_idx = torch.tensor(range(0, n_nodes)).to(device) node_idx = torch.stack((node_idx, closest_atom)).t() closest_atom_sparse = torch.sparse.LongTensor(node_idx.t(), torch.ones(n_nodes, dtype=torch.long).to(device), torch.Size([n_nodes, n_atoms])).to(device) amino_acids = (closest_atom_sparse.to_dense() * idx_translation.view(-1, 1).t()).to_sparse().values().to(cpu) structure.residues = amino_acids if train is True: return train, structure else: return train, structure
def map_dataframe(file, dx, dy, dz, remove_non_ca=0): """Construct the new pdb mapping x, y and z averaged coordinates""" ppdb = PandasPdb() ppdb.read_pdb(file) r = ppdb.df['ATOM'] r.x_coord = r.atom_number.map(dx) r.y_coord = r.atom_number.map(dy) r.z_coord = r.atom_number.map(dz) if remove_non_ca == 0: newname = os.path.splitext(file)[0] + "_averaged.pdb" ppdb.to_pdb(newname) return newname, ppdb elif remove_non_ca == 1: r.drop(r[r['atom_name'] != "CA"].index, inplace=True) newname = os.path.splitext(file)[0] + "_averaged_CA.pdb" ppdb.to_pdb(newname) return newname, ppdb else: exit(1) #f1 = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File1.pdb" #f1w = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File1_wrong.pdb" #f2 = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File2.pdb" #f3 = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File3.pdb" # #dx, dy, dz = get_dictionaries([f1, f2]) #ax = average_dict_values(dx) #ay = average_dict_values(dy) #az = average_dict_values(dz) #map_dataframe(f1, ax, ay, az, remove_non_ca=1)
def get_dictionaries(list): """For each file in the list, generate three dictionaries linking atom number to x, y and z coordinates""" ppdb = PandasPdb() dictio_x = {} dictio_y = {} dictio_z = {} for file in list: ppdb.read_pdb(file) r = ppdb.df['ATOM'] for index, row in r.iterrows(): set_key(dictio_x, row['atom_number'], row['x_coord']) set_key(dictio_y, row['atom_number'], row['y_coord']) set_key(dictio_z, row['atom_number'], row['z_coord']) return dictio_x, dictio_y, dictio_z
def test_fetch_pdb(): """Test fetch_pdb""" try: ppdb = PandasPdb() url, txt = ppdb._fetch_pdb('3eiy') except HTTPError: pass if txt: # skip if PDB down txt[:100] == three_eiy[:100] ppdb.fetch_pdb('3eiy') assert ppdb.pdb_text == txt assert ppdb.pdb_path == 'http://www.rcsb.org/pdb/files/3eiy.pdb'
def save_pdb_df_to_pdb(df: pd.DataFrame, path: str, gz: bool = False): """Saves pdb dataframe to a PDB file. :param g: Dataframe to save as PDB :type g: pd.DataFrame :param path: Path to save PDB file to. :type path: str :param gz: Whether to gzip the file. Defaults to ``False``. :type gz: bool """ ppd = PandasPdb() ppd.df["ATOM"] = df ppd.to_pdb(path=path, records=None, gz=False, append_newline=True) log.info(f"Successfully saved PDB dataframe to {path}")
def __call__(self, pdbid): print(pdbid) # A. Pdb reading # Reading in the pdb for the current conformation ppdb = PandasPdb() CurrentPdbStructure = ppdb.read_pdb("%s/%s.pdb" %(args.blindfolder, str(pdbid))) proteindf = CurrentPdbStructure.df['ATOM'][~CurrentPdbStructure.df['ATOM']["residue_name"].isin(["A","T","C","G","U","DA","DT","DC","DG","DU"])] proteinpoint = np.array([proteindf["x_coord"].tolist(),proteindf["y_coord"].tolist(),proteindf["z_coord"].tolist()]).T proteintree = spatial.cKDTree(proteinpoint) # B. Grid Creation # Define Dimension of Grid box maxx=max(CurrentPdbStructure.df['ATOM']["x_coord"].tolist())+5 minx=min(CurrentPdbStructure.df['ATOM']["x_coord"].tolist())-5 maxy=max(CurrentPdbStructure.df['ATOM']["y_coord"].tolist())+5 miny=min(CurrentPdbStructure.df['ATOM']["y_coord"].tolist())-5 maxz=max(CurrentPdbStructure.df['ATOM']["z_coord"].tolist())+5 minz=min(CurrentPdbStructure.df['ATOM']["z_coord"].tolist())-5 # Surface Grid Points points = np.mgrid[minx:maxx, miny:maxy, minz:maxz] points = np.matrix(points.reshape(3, -1).T) tree = spatial.cKDTree(points) # Index of points within cutoff pointswithincutoff1 = set(itertools.chain.from_iterable(list(tree.query_ball_point(proteinpoint, halo[0])))) # Index of points within cutoff pointswithincutoff2 = set(itertools.chain.from_iterable(list(tree.query_ball_point(proteinpoint, halo[1])))) # Surface points within the midline and finalise tree for Surface accordingly midlineindex = sorted(pointswithincutoff2 - pointswithincutoff1) print (pdbid,len(points),len(midlineindex)) print (points[1].tolist()[0][0]) with open("%s/%s_Grid.ptf"%(args.blindfolder, pdbid),'w+') as f: for i in midlineindex: f.write('%s\t%.3f\t%.3f\t%.3f\t#\t%s000:X@XX:grid\n' %(pdbid, points[i].tolist()[0][0], points[i].tolist()[0][1], points[i].tolist()[0][2], str("X"))) #print(points[np.array(midlineindex)].tolist()) XYZ(points[np.array(midlineindex)].tolist(),"Ge","%s/%s_Grid.xyz" %(args.blindfolder,pdbid))
def prepare_docking_grid_and_dock(self): df = PandasPdb().read_pdb('./protein_pdbqts/' + self.protein).df[ 'ATOM'] # opens protein to calculate grid minx = df['x_coord'].min() maxx = df['x_coord'].max() cent_x = round((maxx + minx) / 2, 2) size_x = round(abs(maxx - minx) + 3, 2) miny = df['y_coord'].min() maxy = df['y_coord'].max() cent_y = round((maxy + miny) / 2, 2) size_y = round(abs(maxy - miny) + 3, 2) minz = df['z_coord'].min() maxz = df['z_coord'].max() cent_z = round((maxz + minz) / 2, 2) size_z = round(abs(maxz - minz) + 3, 2) print("Center point of docking grid for {} is as follows: " "x: {}, y: {}, z: {}".format(self.protein, size_x, size_y, size_z)) print("Sizes of docking grid are as follows:" "x: {}, y: {}, z: {}".format(cent_x, cent_y, cent_z)) os.system( 'vina --receptor {} --ligand {} --center_x {} --center_y {} --center_z {} --size_x {} --size_y {} --size_z {} --log {} --out {}' .format('./protein_pdbqts/' + self.protein, './ligand_pdbqts/' + self.ligand, cent_x, cent_y, cent_z, size_x, size_y, size_z, self.ligand + '_docking_log', self.ligand + '.out')) try: # cleaning shutil.move(self.ligand + '.out', './results/') shutil.move(self.ligand + '_docking_log', './results/') except Exception as e: print(e) os.remove(self.ligand + '.out') os.remove(self.ligand + '_docking_log')
def get_residue_dictionary(pdb_file, sites, residues): from biopandas.pdb import PandasPdb ppdb = PandasPdb().read_pdb(pdb_file) files = ppdb.pdb_text.split('\n')[:1000] temp = [i for i in files if "SITE" in i and "REMARK" not in i] for i in temp: tempp = i.split() flag = 0 for i in temp: if len(i) > 4: flag = 1 break if flag == 1: t = [] for i in tempp: if len(i) > 4: t.append(i[0]) t.append(i[1:]) else: t.append(i) tempp = t for j, k, l in zip(tempp[4::3], tempp[5::3], tempp[6::3]): residues["Residue Name"][sites[tempp[2]]].append(j) residues["Residue Chain"][sites[tempp[2]]].append(k) residues["Residue Number"][sites[tempp[2]]].append(l) return residues