def main(): parser = argparse.ArgumentParser() parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') args = parser.parse_args() docking_config = [] scores = [] with open(args.docked_prot_file) as fp: for line in fp: if line[0] == '#': continue protein, target, start = line.strip().split() protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start)) pose_path = os.path.join(pair_path, 'cartesian_ligand_poses') docking_config.append({'folder': pair_path, 'name': '{}-to-{}_cartesian'.format(target, start), 'grid_file': os.path.join(pair_path, '{}-to-{}.zip'.format(target, start)), 'prepped_ligand_file': os.path.join(pair_path, '{}-to-{}_cartesian_merge_pv.mae'.format(target, start)), 'glide_settings': {'num_poses': 1, 'docking_method': 'inplace'}}) dock_set = Docking_Set() results = dock_set.get_docking_gscores(docking_config, mode='multi') results_by_ligand = results['{}-to-{}_cartesian'.format(target, start)] for file in results_by_ligand: s = list(structure.StructureReader(os.path.join(pose_path, file)))[0] scores.append((file, get_centroid(s), score_no_vdW(results_by_ligand[file][0]))) print(scores) break
def test_run_rmsd_set(self): rmsd_config = [{'folder': test_directory + '/test_docking1', 'name': 'test_docking1', 'ligand_file': test_directory + '/testfile.mae'}, {'folder': test_directory + '/test_docking2', 'name': 'test_docking2', 'ligand_file': test_directory + '/testfile.mae'} ] run_config = {'run_folder': test_directory + '/run', 'group_size': 5, 'partition': 'rondor', 'dry_run': True} dock_set = Docking_Set() dock_set.run_rmsd_set(rmsd_config, run_config) #should write 1 file, a .in file within the dock folder sh_file = test_directory + '/run/rmsd_0.sh' self.assertTrue(os.path.isfile(sh_file)) #check that sh file contains correct lines correct_lines_sh = ['#!/bin/bash', 'cd {}'.format(test_directory + '/test_docking1'), '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking1_rmsd.csv {} test_docking1_pv.maegz'.format(test_directory + '/testfile.mae'), 'cd {}'.format(test_directory + '/run'), 'cd {}'.format(test_directory + '/test_docking2'), '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking2_rmsd.csv {} test_docking2_pv.maegz'.format(test_directory + '/testfile.mae'), 'cd {}'.format(test_directory + '/run')] with open(sh_file, "r") as f: for i, line in enumerate(f): self.assertEqual(line, correct_lines_sh[i] + '\n')
def run(process, run_path, raw_root, decoy_type, n, max_num_concurrent_jobs): """ get scores and rmsds :param process: (list) list of all protein, target, start :param run_path: (string) directory where script and output files will be written :param raw_root: (string) directory where raw data will be placed :param n: (int) number of protein, target, start groups processed in group task :return: """ docking_config = [] print(len(process)) for protein, target, start in process: pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, decoy_type) if not os.path.exists( os.path.join(pair_path, '{}_{}.scor'.format(pair, decoy_type))): docking_config.append({ 'folder': pair_path, 'name': '{}_{}'.format(pair, decoy_type), 'grid_file': os.path.join(pair_path, '{}.zip'.format(pair)), 'prepped_ligand_file': os.path.join(pair_path, '{}_{}_merge_pv.mae'.format(pair, decoy_type)), 'glide_settings': { 'num_poses': 1, 'docking_method': 'inplace' }, 'ligand_file': os.path.join(pose_path, '{}_lig0.mae'.format(target)) }) if len(docking_config) == max_num_concurrent_jobs: break print(len(docking_config)) run_config = { 'run_folder': run_path, 'group_size': n, 'partition': 'rondror', 'dry_run': False } dock_set = Docking_Set() dock_set.run_docking_rmsd_delete(docking_config, run_config)
def test_run_docking_rmsd_delete_set(self): all_config = [{'folder':test_directory+'/test_docking1', 'name':'test_docking1', 'grid_file':test_directory+'/testfile.zip', 'prepped_ligand_file':test_directory+'/testfile.mae', 'ligand_file': test_directory + '/testfile.mae', 'glide_settings':{'num_poses': 10}}, {'folder': test_directory + '/test_docking2', 'name': 'test_docking2', 'grid_file': test_directory+'/testfile.zip', 'prepped_ligand_file': test_directory+'/testfile.mae', 'ligand_file': test_directory + '/testfile.mae', 'glide_settings': {'num_poses': 10}}] run_config = {'run_folder': test_directory + '/run', 'group_size': 5, 'partition': 'rondor', 'dry_run': True} dock_set = Docking_Set() dock_set.run_docking_rmsd_delete(all_config, run_config) sh_file = test_directory + '/run/all_0.sh' self.assertTrue(os.path.isfile(sh_file)) # check that sh file contains correct lines correct_lines_sh = ['#!/bin/bash', 'cd {}'.format(test_directory + '/test_docking1'), '$SCHRODINGER/glide -WAIT test_docking1.in', '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking1_rmsd.csv {} test_docking1_pv.maegz'.format( test_directory + '/testfile.mae'), 'rm test_docking1_pv.maegz', 'cd {}'.format(test_directory + '/run'), 'cd {}'.format(test_directory + '/test_docking2'), '$SCHRODINGER/glide -WAIT test_docking2.in', '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking2_rmsd.csv {} test_docking2_pv.maegz'.format( test_directory + '/testfile.mae'), 'rm test_docking2_pv.maegz', 'cd {}'.format(test_directory + '/run')] with open(sh_file, "r") as f: for i, line in enumerate(f): self.assertEqual(line, correct_lines_sh[i] + '\n')
def run_rmsd_set(self): test_directory = os.getcwd() + '/testrun1' docking_config = [{ 'folder': test_directory + '/test_docking1', 'name': 'test_docking1', 'grid_file': test_data_directory + '/2B7A.zip', 'prepped_ligand_file': test_data_directory + '/2W1I_lig.mae', 'ligand_file': test_data_directory + '/2W1I_lig_correct.mae', 'glide_settings': { 'num_poses': 10 } }, { 'folder': test_directory + '/test_docking2', 'name': 'test_docking2', 'grid_file': test_data_directory + '/2B7A.zip', 'prepped_ligand_file': test_data_directory + '/2W1I_lig.mae', 'ligand_file': test_data_directory + '/2W1I_lig_correct.mae', 'glide_settings': { 'num_poses': 10 } }] run_config = { 'run_folder': test_directory + '/run', 'group_size': 1, 'partition': 'rondror', 'dry_run': False } dock_set = Docking_Set() dock_set.run_rmsd_set(docking_config, run_config) for i in range(1, 15): if (all(dock_set.check_rmsd_set_done(docking_config))): print("RMSD Completed") return else: print("Waiting for rmsd calculation completion ...") time.sleep(60) self.fail("Test failed, did not output rmsd within 15 minutes")
def test_docking_set_inplace(self): ''' #will use a .mae containing multiple ligands some of which have clashes From top level directory $SCHRODINGER/run python3 -m unittest docking.test.functional_test.functional_test.TestDocking_Set.test_docking_set_inplace ''' test_directory = os.getcwd() + '/testrun3' #Note: docking method is set to inplace docking_config = [ { 'folder': test_directory + '/test_docking1', 'name': 'test_docking1', 'grid_file': test_data_directory + '/2B7A.zip', 'prepped_ligand_file': test_data_directory + '/2W1I_3_poses.mae', 'glide_settings': { 'num_poses': 1, 'docking_method': 'inplace' } }, ] run_config = { 'run_folder': test_directory + '/run', 'group_size': 5, 'partition': 'rondror', 'dry_run': False } dock_set = Docking_Set() dock_set.run_docking_set(docking_config, run_config) for i in range(1, 15): done_list, log_list = dock_set.check_docking_set_done( docking_config) if (all(done_list)): print("Docking Completed") #Note: get the scores, not that pose1 has purposeful clashes results = dock_set.get_docking_gscores(docking_config, mode='multi') results_by_ligand = results['test_docking1'] self.assertEqual(results_by_ligand['2W1I_pose2'][0]['GScore'], -7.07) self.assertEqual(results_by_ligand['2W1I_pose1'][0]['GScore'], 10000.00) self.assertEqual(results_by_ligand['2W1I_pose1'][0]['vdW'], 14374956.0) #compute the score without vdW terms self.assertTrue( score_no_vdW(results_by_ligand['2W1I_pose1'][0]) - 4.89 < 0.0001) return else: print("Waiting for docking completion ...") time.sleep(60) self.fail("Test failed, did not output docking within 15 minutes")
def test_run_docking_set(self): docking_config = [{'folder':test_directory+'/test_docking1', 'name':'test_docking1', 'grid_file':test_directory+'/testfile.zip', 'prepped_ligand_file':test_directory+'/testfile.mae', 'glide_settings': {'num_poses': 10}}, {'folder': test_directory + '/test_docking2', 'name': 'test_docking2', 'grid_file': test_directory+'/testfile.zip', 'prepped_ligand_file': test_directory+'/testfile.mae', 'glide_settings': {'num_poses': 10}} ] run_config = {'run_folder':test_directory+'/run', 'group_size':5, 'partition':'rondor', 'dry_run':True} dock_set = Docking_Set() dock_set.run_docking_set(docking_config, run_config) #should write 2 files, a .sh run file within the run folder and a .in file within the dock folder self.assertTrue(os.path.isfile(test_directory + '/test_docking1/test_docking1.in')) self.assertTrue(os.path.isfile(test_directory + '/test_docking2/test_docking2.in')) #check the lines on sh file sh_file = test_directory + '/run/dock_0.sh' self.assertTrue(os.path.isfile(sh_file)) correct_lines_sh = ['#!/bin/bash', 'cd {}'.format(test_directory+'/test_docking1'), '$SCHRODINGER/glide -WAIT test_docking1.in', 'cd {}'.format(test_directory+'/run'), 'cd {}'.format(test_directory + '/test_docking2'), '$SCHRODINGER/glide -WAIT test_docking2.in', 'cd {}'.format(test_directory + '/run')] with open(sh_file, "r") as f: for i, line in enumerate(f): self.assertEqual(line, correct_lines_sh[i]+'\n')
:param combind_root: path to the combind root folder :return: list of protein name strings ''' proteins = sorted(os.listdir(combind_root)) proteins = [p for p in proteins if p[0] != '.'] print(proteins) return proteins if __name__ == '__main__': max_ligands = 25 combind_root = '/scratch/PI/rondror/combind/bpp_data' output_folder = '/scratch/PI/rondror/combind/flexibility/MAPK14_mut_pred/mut_rmsds' result_folder = '/scratch/PI/rondror/combind/flexibility/MAPK14_mut_pred/mut_rmsds' proteins = ['MAPK14'] dock_set = Docking_Set() task = sys.argv[1] if task == 'run_dock': for protein in proteins: docking_config = get_docking_info(combind_root, protein, max_ligands, output_folder) run_config = {'run_folder': output_folder+'/{}/run'.format(protein), 'group_size': 15, 'partition': 'owners', 'dry_run': False} print(protein) dock_set.run_docking_rmsd_delete(docking_config, run_config, incomplete_only=True) if task == 'check': for protein in proteins: docking_config = get_docking_info(combind_root, protein, max_ligands, output_folder)
def check(docked_prot_file, raw_root, decoy_type): """ check if scores and rmsds were calculated :param docked_prot_file: (string) file listing proteins to process :param raw_root: (string) directory where raw data will be placed :return: """ counter = 0 missing = [] incomplete = [] with open(docked_prot_file) as fp: for line in tqdm(fp, desc='protein, target, start groups'): if line[0] == '#': continue protein, target, start = line.strip().split() pair = '{}-to-{}'.format(target, start) counter += 1 docking_config = [] protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) pose_path = os.path.join(pair_path, decoy_type) docking_config.append({ 'folder': pair_path, 'name': '{}_{}'.format(pair, decoy_type), 'grid_file': os.path.join(pair_path, '{}.zip'.format(pair)), 'prepped_ligand_file': os.path.join(pair_path, '{}_{}_merge_pv.mae'.format(pair, decoy_type)), 'glide_settings': { 'num_poses': 1, 'docking_method': 'inplace' }, 'ligand_file': os.path.join(pose_path, '{}_lig0.mae'.format(target)) }) dock_set = Docking_Set() if not os.path.exists( os.path.join(pair_path, '{}_{}.scor'.format( pair, decoy_type))): print( os.path.join(pair_path, '{}_{}.scor'.format(pair, decoy_type))) missing.append((protein, target, start)) continue else: if not os.path.exists( os.path.join(pair_path, '{}_{}_rmsd.csv'.format( pair, decoy_type))): print( os.path.join(pair_path, '{}_{}_rmsd.csv'.format(pair, decoy_type))) incomplete.append((protein, target, start)) continue results = dock_set.get_docking_gscores(docking_config, mode='multi') results_by_ligand = results['{}_{}'.format(pair, decoy_type)] if len(results_by_ligand.keys()) != 100: # print(results_by_ligand.keys()) print(len(results_by_ligand.keys()), 100) incomplete.append((protein, target, start)) continue print('Missing', len(missing), '/', counter) print('Incomplete', len(incomplete), '/', counter - len(missing)) print(incomplete)
def run_group(grouped_files, raw_root, index, rmsd_cutoff, decoy_type): for protein, target, start in grouped_files[index]: pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(raw_root, protein) pair_path = os.path.join(protein_path, pair) print(pair_path) pose_path = os.path.join(pair_path, decoy_type) pair_data = [] # get mcss with open('{}/{}_mcss.csv'.format(pair_path, pair)) as f: mcss = int(f.readline().strip().split(',')[4]) # get rmsd rmsds = pd.read_csv('{}/{}_{}_rmsd.csv'.format(pair_path, pair, decoy_type)) # get physics score docking_config = [{ 'folder': pair_path, 'name': '{}_{}'.format(pair, decoy_type), 'grid_file': os.path.join(pair_path, '{}.zip'.format(pair)), 'prepped_ligand_file': os.path.join(pair_path, '{}_{}_merge_pv.mae'.format(pair, decoy_type)), 'glide_settings': { 'num_poses': 1, 'docking_method': 'inplace' }, 'ligand_file': os.path.join(pose_path, '{}_lig0.mae'.format(target)) }] dock_set = Docking_Set() results = dock_set.get_docking_gscores(docking_config, mode='multi') for file in results['{}_{}'.format(pair, decoy_type)]: target_start_results = results['{}_{}'.format(pair, decoy_type)] target_start_glide_score = target_start_results[file][0]['Score'] target_start_score_no_vdw = score_no_vdW( target_start_results[file][0]) rmsd = rmsds[rmsds['Title'] == file]['RMSD'].iloc[0] if rmsd > rmsd_cutoff: modified_rmsd = rmsd**3 else: modified_rmsd = rmsd pair_data.append([ protein, start, file[:-4], rmsd, modified_rmsd, mcss, target_start_glide_score, target_start_score_no_vdw ]) to_df(pair_data, pair_path, pair, decoy_type) # os.remove(os.path.join(pair_path, '{}_mcss.csv'.format(pair))) # os.remove(os.path.join(pair_path, '{}_mege_pv.mae.gz'.format(pair))) if os.path.exists( os.path.join(pair_path, '{}_{}.in'.format(pair, decoy_type))): os.remove( os.path.join(pair_path, '{}_{}.in'.format(pair, decoy_type))) if os.path.exists( os.path.join(pair_path, '{}_{}.log'.format(pair, decoy_type))): os.remove( os.path.join(pair_path, '{}_{}.log'.format(pair, decoy_type))) if os.path.exists( os.path.join(pair_path, '{}_{}_pv.maegz'.format(pair, decoy_type))): os.remove( os.path.join(pair_path, '{}_{}_pv.maegz'.format(pair, decoy_type))) # os.remove(os.path.join(pair_path, '{}_rmsd.csv'.format(pair))) # os.remove(os.path.join(pair_path, '{}.scor'.format(pair))) # os.remove(os.path.join(pair_path, '{}.zip'.format(pair))) if os.path.exists( os.path.join(pair_path, '{}_{}_state.json'.format(pair, decoy_type))): os.remove( os.path.join(pair_path, '{}_{}_state.json'.format(pair, decoy_type)))