コード例 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process')
    parser.add_argument('raw_root', type=str, help='directory where raw data will be placed')
    args = parser.parse_args()

    docking_config = []
    scores = []

    with open(args.docked_prot_file) as fp:
        for line in fp:
            if line[0] == '#': continue
            protein, target, start = line.strip().split()
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start))
            pose_path = os.path.join(pair_path, 'cartesian_ligand_poses')
            docking_config.append({'folder': pair_path,
                                   'name': '{}-to-{}_cartesian'.format(target, start),
                                   'grid_file': os.path.join(pair_path, '{}-to-{}.zip'.format(target, start)),
                                   'prepped_ligand_file':
                                       os.path.join(pair_path, '{}-to-{}_cartesian_merge_pv.mae'.format(target, start)),
                                   'glide_settings': {'num_poses': 1, 'docking_method': 'inplace'}})

            dock_set = Docking_Set()
            results = dock_set.get_docking_gscores(docking_config, mode='multi')
            results_by_ligand = results['{}-to-{}_cartesian'.format(target, start)]
            for file in results_by_ligand:
                s = list(structure.StructureReader(os.path.join(pose_path, file)))[0]
                scores.append((file, get_centroid(s), score_no_vdW(results_by_ligand[file][0])))
                print(scores)
            break
コード例 #2
0
    def test_run_rmsd_set(self):
        rmsd_config = [{'folder': test_directory + '/test_docking1',
                           'name': 'test_docking1',
                           'ligand_file': test_directory + '/testfile.mae'},
                          {'folder': test_directory + '/test_docking2',
                           'name': 'test_docking2',
                           'ligand_file': test_directory + '/testfile.mae'}
                          ]
        run_config = {'run_folder': test_directory + '/run',
                      'group_size': 5,
                      'partition': 'rondor',
                      'dry_run': True}
        dock_set = Docking_Set()
        dock_set.run_rmsd_set(rmsd_config, run_config)
        #should write 1 file, a .in file within the dock folder
        sh_file = test_directory + '/run/rmsd_0.sh'
        self.assertTrue(os.path.isfile(sh_file))
        #check that sh file contains correct lines
        correct_lines_sh = ['#!/bin/bash',
                            'cd {}'.format(test_directory + '/test_docking1'),
                            '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking1_rmsd.csv {} test_docking1_pv.maegz'.format(test_directory + '/testfile.mae'),
                            'cd {}'.format(test_directory + '/run'),
                            'cd {}'.format(test_directory + '/test_docking2'),
                            '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking2_rmsd.csv {} test_docking2_pv.maegz'.format(test_directory + '/testfile.mae'),
                            'cd {}'.format(test_directory + '/run')]

        with open(sh_file, "r") as f:
            for i, line in enumerate(f):
                self.assertEqual(line, correct_lines_sh[i] + '\n')
コード例 #3
0
def run(process, run_path, raw_root, decoy_type, n, max_num_concurrent_jobs):
    """
    get scores and rmsds
    :param process: (list) list of all protein, target, start
    :param run_path: (string) directory where script and output files will be written
    :param raw_root: (string) directory where raw data will be placed
    :param n: (int) number of protein, target, start groups processed in group task
    :return:
    """
    docking_config = []
    print(len(process))
    for protein, target, start in process:
        pair = '{}-to-{}'.format(target, start)
        protein_path = os.path.join(raw_root, protein)
        pair_path = os.path.join(protein_path, pair)
        pose_path = os.path.join(pair_path, decoy_type)
        if not os.path.exists(
                os.path.join(pair_path, '{}_{}.scor'.format(pair,
                                                            decoy_type))):
            docking_config.append({
                'folder':
                pair_path,
                'name':
                '{}_{}'.format(pair, decoy_type),
                'grid_file':
                os.path.join(pair_path, '{}.zip'.format(pair)),
                'prepped_ligand_file':
                os.path.join(pair_path,
                             '{}_{}_merge_pv.mae'.format(pair, decoy_type)),
                'glide_settings': {
                    'num_poses': 1,
                    'docking_method': 'inplace'
                },
                'ligand_file':
                os.path.join(pose_path, '{}_lig0.mae'.format(target))
            })
        if len(docking_config) == max_num_concurrent_jobs:
            break
    print(len(docking_config))

    run_config = {
        'run_folder': run_path,
        'group_size': n,
        'partition': 'rondror',
        'dry_run': False
    }

    dock_set = Docking_Set()
    dock_set.run_docking_rmsd_delete(docking_config, run_config)
コード例 #4
0
    def test_run_docking_rmsd_delete_set(self):
        all_config = [{'folder':test_directory+'/test_docking1',
                           'name':'test_docking1',
                           'grid_file':test_directory+'/testfile.zip',
                           'prepped_ligand_file':test_directory+'/testfile.mae',
                           'ligand_file': test_directory + '/testfile.mae',
                           'glide_settings':{'num_poses': 10}},
                          {'folder': test_directory + '/test_docking2',
                           'name': 'test_docking2',
                           'grid_file': test_directory+'/testfile.zip',
                           'prepped_ligand_file': test_directory+'/testfile.mae',
                           'ligand_file': test_directory + '/testfile.mae',
                           'glide_settings': {'num_poses': 10}}]

        run_config = {'run_folder': test_directory + '/run',
                    'group_size': 5,
                    'partition': 'rondor',
                    'dry_run': True}

        dock_set = Docking_Set()
        dock_set.run_docking_rmsd_delete(all_config, run_config)
        sh_file = test_directory + '/run/all_0.sh'
        self.assertTrue(os.path.isfile(sh_file))
        # check that sh file contains correct lines
        correct_lines_sh = ['#!/bin/bash',
                            'cd {}'.format(test_directory + '/test_docking1'),
                            '$SCHRODINGER/glide -WAIT test_docking1.in',
                            '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking1_rmsd.csv {} test_docking1_pv.maegz'.format(
                                test_directory + '/testfile.mae'),
                            'rm test_docking1_pv.maegz',
                            'cd {}'.format(test_directory + '/run'),
                            'cd {}'.format(test_directory + '/test_docking2'),
                            '$SCHRODINGER/glide -WAIT test_docking2.in',
                            '$SCHRODINGER/run rmsd.py -use_neutral_scaffold -pv second -c test_docking2_rmsd.csv {} test_docking2_pv.maegz'.format(
                                test_directory + '/testfile.mae'),
                            'rm test_docking2_pv.maegz',
                            'cd {}'.format(test_directory + '/run')]
        with open(sh_file, "r") as f:
            for i, line in enumerate(f):
                self.assertEqual(line, correct_lines_sh[i] + '\n')
コード例 #5
0
    def run_rmsd_set(self):
        test_directory = os.getcwd() + '/testrun1'
        docking_config = [{
            'folder': test_directory + '/test_docking1',
            'name': 'test_docking1',
            'grid_file': test_data_directory + '/2B7A.zip',
            'prepped_ligand_file': test_data_directory + '/2W1I_lig.mae',
            'ligand_file': test_data_directory + '/2W1I_lig_correct.mae',
            'glide_settings': {
                'num_poses': 10
            }
        }, {
            'folder': test_directory + '/test_docking2',
            'name': 'test_docking2',
            'grid_file': test_data_directory + '/2B7A.zip',
            'prepped_ligand_file': test_data_directory + '/2W1I_lig.mae',
            'ligand_file': test_data_directory + '/2W1I_lig_correct.mae',
            'glide_settings': {
                'num_poses': 10
            }
        }]

        run_config = {
            'run_folder': test_directory + '/run',
            'group_size': 1,
            'partition': 'rondror',
            'dry_run': False
        }
        dock_set = Docking_Set()
        dock_set.run_rmsd_set(docking_config, run_config)

        for i in range(1, 15):
            if (all(dock_set.check_rmsd_set_done(docking_config))):
                print("RMSD  Completed")
                return
            else:
                print("Waiting for rmsd calculation completion ...")
            time.sleep(60)
        self.fail("Test failed, did not output rmsd within 15 minutes")
コード例 #6
0
    def test_docking_set_inplace(self):
        '''
      #will use a .mae containing multiple ligands some of which have clashes
      From top level directory 
      $SCHRODINGER/run python3 -m unittest docking.test.functional_test.functional_test.TestDocking_Set.test_docking_set_inplace
      '''

        test_directory = os.getcwd() + '/testrun3'

        #Note: docking method is set to inplace
        docking_config = [
            {
                'folder': test_directory + '/test_docking1',
                'name': 'test_docking1',
                'grid_file': test_data_directory + '/2B7A.zip',
                'prepped_ligand_file':
                test_data_directory + '/2W1I_3_poses.mae',
                'glide_settings': {
                    'num_poses': 1,
                    'docking_method': 'inplace'
                }
            },
        ]
        run_config = {
            'run_folder': test_directory + '/run',
            'group_size': 5,
            'partition': 'rondror',
            'dry_run': False
        }

        dock_set = Docking_Set()
        dock_set.run_docking_set(docking_config, run_config)

        for i in range(1, 15):
            done_list, log_list = dock_set.check_docking_set_done(
                docking_config)
            if (all(done_list)):
                print("Docking Completed")

                #Note: get the scores, not that pose1 has purposeful clashes
                results = dock_set.get_docking_gscores(docking_config,
                                                       mode='multi')
                results_by_ligand = results['test_docking1']
                self.assertEqual(results_by_ligand['2W1I_pose2'][0]['GScore'],
                                 -7.07)
                self.assertEqual(results_by_ligand['2W1I_pose1'][0]['GScore'],
                                 10000.00)
                self.assertEqual(results_by_ligand['2W1I_pose1'][0]['vdW'],
                                 14374956.0)
                #compute the score without vdW terms
                self.assertTrue(
                    score_no_vdW(results_by_ligand['2W1I_pose1'][0]) -
                    4.89 < 0.0001)

                return
            else:
                print("Waiting for docking completion ...")
            time.sleep(60)
        self.fail("Test failed, did not output docking within 15 minutes")
コード例 #7
0
    def test_run_docking_set(self):
        docking_config = [{'folder':test_directory+'/test_docking1',
                           'name':'test_docking1',
                           'grid_file':test_directory+'/testfile.zip',
                           'prepped_ligand_file':test_directory+'/testfile.mae',
                           'glide_settings': {'num_poses': 10}},
                          {'folder': test_directory + '/test_docking2',
                           'name': 'test_docking2',
                           'grid_file': test_directory+'/testfile.zip',
                           'prepped_ligand_file': test_directory+'/testfile.mae',
                           'glide_settings': {'num_poses': 10}}
                          ]

        run_config = {'run_folder':test_directory+'/run',
                     'group_size':5,
                     'partition':'rondor',
                      'dry_run':True}

        dock_set = Docking_Set()
        dock_set.run_docking_set(docking_config, run_config)
        #should write 2 files, a .sh run file within the run folder and a .in file within the dock folder
        self.assertTrue(os.path.isfile(test_directory + '/test_docking1/test_docking1.in'))
        self.assertTrue(os.path.isfile(test_directory + '/test_docking2/test_docking2.in'))
        #check the lines on sh file
        sh_file = test_directory + '/run/dock_0.sh'
        self.assertTrue(os.path.isfile(sh_file))
        correct_lines_sh = ['#!/bin/bash',
                             'cd {}'.format(test_directory+'/test_docking1'),
                             '$SCHRODINGER/glide -WAIT test_docking1.in',
                             'cd {}'.format(test_directory+'/run'),
                             'cd {}'.format(test_directory + '/test_docking2'),
                             '$SCHRODINGER/glide -WAIT test_docking2.in',
                             'cd {}'.format(test_directory + '/run')]
        with open(sh_file, "r") as f:
            for i, line in enumerate(f):
                self.assertEqual(line, correct_lines_sh[i]+'\n')
コード例 #8
0
    :param combind_root: path to the combind root folder
    :return: list of protein name strings
    '''
    proteins = sorted(os.listdir(combind_root))
    proteins = [p for p in proteins if p[0] != '.']
    print(proteins)
    return proteins


if __name__ == '__main__':
    max_ligands = 25
    combind_root = '/scratch/PI/rondror/combind/bpp_data'
    output_folder = '/scratch/PI/rondror/combind/flexibility/MAPK14_mut_pred/mut_rmsds'
    result_folder = '/scratch/PI/rondror/combind/flexibility/MAPK14_mut_pred/mut_rmsds'
    proteins = ['MAPK14']
    dock_set = Docking_Set()

    task = sys.argv[1]
    if task == 'run_dock':
        for protein in proteins:
            docking_config = get_docking_info(combind_root, protein, max_ligands, output_folder)
            run_config = {'run_folder': output_folder+'/{}/run'.format(protein),
                          'group_size': 15,
                          'partition': 'owners',
                          'dry_run': False}
            print(protein)
            dock_set.run_docking_rmsd_delete(docking_config, run_config, incomplete_only=True)

    if task == 'check':
        for protein in proteins:
            docking_config = get_docking_info(combind_root, protein, max_ligands, output_folder)
コード例 #9
0
def check(docked_prot_file, raw_root, decoy_type):
    """
    check if scores and rmsds were calculated
    :param docked_prot_file: (string) file listing proteins to process
    :param raw_root: (string) directory where raw data will be placed
    :return:
    """
    counter = 0
    missing = []
    incomplete = []
    with open(docked_prot_file) as fp:
        for line in tqdm(fp, desc='protein, target, start groups'):
            if line[0] == '#': continue
            protein, target, start = line.strip().split()
            pair = '{}-to-{}'.format(target, start)
            counter += 1
            docking_config = []
            protein_path = os.path.join(raw_root, protein)
            pair_path = os.path.join(protein_path, pair)
            pose_path = os.path.join(pair_path, decoy_type)
            docking_config.append({
                'folder':
                pair_path,
                'name':
                '{}_{}'.format(pair, decoy_type),
                'grid_file':
                os.path.join(pair_path, '{}.zip'.format(pair)),
                'prepped_ligand_file':
                os.path.join(pair_path,
                             '{}_{}_merge_pv.mae'.format(pair, decoy_type)),
                'glide_settings': {
                    'num_poses': 1,
                    'docking_method': 'inplace'
                },
                'ligand_file':
                os.path.join(pose_path, '{}_lig0.mae'.format(target))
            })

            dock_set = Docking_Set()
            if not os.path.exists(
                    os.path.join(pair_path, '{}_{}.scor'.format(
                        pair, decoy_type))):
                print(
                    os.path.join(pair_path,
                                 '{}_{}.scor'.format(pair, decoy_type)))
                missing.append((protein, target, start))
                continue
            else:
                if not os.path.exists(
                        os.path.join(pair_path, '{}_{}_rmsd.csv'.format(
                            pair, decoy_type))):
                    print(
                        os.path.join(pair_path,
                                     '{}_{}_rmsd.csv'.format(pair,
                                                             decoy_type)))
                    incomplete.append((protein, target, start))
                    continue
                results = dock_set.get_docking_gscores(docking_config,
                                                       mode='multi')
                results_by_ligand = results['{}_{}'.format(pair, decoy_type)]
                if len(results_by_ligand.keys()) != 100:
                    # print(results_by_ligand.keys())
                    print(len(results_by_ligand.keys()), 100)
                    incomplete.append((protein, target, start))
                    continue

        print('Missing', len(missing), '/', counter)
        print('Incomplete', len(incomplete), '/', counter - len(missing))
        print(incomplete)
コード例 #10
0
def run_group(grouped_files, raw_root, index, rmsd_cutoff, decoy_type):
    for protein, target, start in grouped_files[index]:
        pair = '{}-to-{}'.format(target, start)
        protein_path = os.path.join(raw_root, protein)
        pair_path = os.path.join(protein_path, pair)
        print(pair_path)
        pose_path = os.path.join(pair_path, decoy_type)
        pair_data = []

        # get mcss
        with open('{}/{}_mcss.csv'.format(pair_path, pair)) as f:
            mcss = int(f.readline().strip().split(',')[4])

        # get rmsd
        rmsds = pd.read_csv('{}/{}_{}_rmsd.csv'.format(pair_path, pair,
                                                       decoy_type))

        # get physics score
        docking_config = [{
            'folder':
            pair_path,
            'name':
            '{}_{}'.format(pair, decoy_type),
            'grid_file':
            os.path.join(pair_path, '{}.zip'.format(pair)),
            'prepped_ligand_file':
            os.path.join(pair_path,
                         '{}_{}_merge_pv.mae'.format(pair, decoy_type)),
            'glide_settings': {
                'num_poses': 1,
                'docking_method': 'inplace'
            },
            'ligand_file':
            os.path.join(pose_path, '{}_lig0.mae'.format(target))
        }]
        dock_set = Docking_Set()
        results = dock_set.get_docking_gscores(docking_config, mode='multi')
        for file in results['{}_{}'.format(pair, decoy_type)]:
            target_start_results = results['{}_{}'.format(pair, decoy_type)]
            target_start_glide_score = target_start_results[file][0]['Score']
            target_start_score_no_vdw = score_no_vdW(
                target_start_results[file][0])
            rmsd = rmsds[rmsds['Title'] == file]['RMSD'].iloc[0]
            if rmsd > rmsd_cutoff:
                modified_rmsd = rmsd**3
            else:
                modified_rmsd = rmsd
            pair_data.append([
                protein, start, file[:-4], rmsd, modified_rmsd, mcss,
                target_start_glide_score, target_start_score_no_vdw
            ])

        to_df(pair_data, pair_path, pair, decoy_type)
        # os.remove(os.path.join(pair_path, '{}_mcss.csv'.format(pair)))
        # os.remove(os.path.join(pair_path, '{}_mege_pv.mae.gz'.format(pair)))
        if os.path.exists(
                os.path.join(pair_path, '{}_{}.in'.format(pair, decoy_type))):
            os.remove(
                os.path.join(pair_path, '{}_{}.in'.format(pair, decoy_type)))
        if os.path.exists(
                os.path.join(pair_path, '{}_{}.log'.format(pair, decoy_type))):
            os.remove(
                os.path.join(pair_path, '{}_{}.log'.format(pair, decoy_type)))
        if os.path.exists(
                os.path.join(pair_path,
                             '{}_{}_pv.maegz'.format(pair, decoy_type))):
            os.remove(
                os.path.join(pair_path,
                             '{}_{}_pv.maegz'.format(pair, decoy_type)))
        # os.remove(os.path.join(pair_path, '{}_rmsd.csv'.format(pair)))
        # os.remove(os.path.join(pair_path, '{}.scor'.format(pair)))
        # os.remove(os.path.join(pair_path, '{}.zip'.format(pair)))
        if os.path.exists(
                os.path.join(pair_path,
                             '{}_{}_state.json'.format(pair, decoy_type))):
            os.remove(
                os.path.join(pair_path,
                             '{}_{}_state.json'.format(pair, decoy_type)))