def rotamers(rot_s, rot_r, s, r, cutoff):
    try:
        rotamer_lib = rotamers.Rotamers(rot_s, list(rot_r.atom)[0])
        a_ls = r.getAtomList()
        r_rmsd_ls = []
        rmsd_ls = []
        counter = 0

        for k, rotamer in enumerate(list(rotamer_lib.rotamers)):
            rotamer.apply()
            rot_a_ls = rot_r.getAtomList()
            no_r_a_ls = [
                a.index for a in rot_s.atom
                if a.index not in rot_a_ls and a.chain != 'L'
            ]
            clash = steric_clash.clash_volume(rot_s, rot_a_ls, rot_s,
                                              no_r_a_ls)

            if 'LEU' in r.pdbres and r.resnum == 167:
                print(k, clash)

            if clash < cutoff:
                counter += 1
                r_rmsd_ls.append(
                    rmsd.calculate_in_place_rmsd(s, a_ls, rot_s, rot_a_ls))

            rmsd_ls.append(
                rmsd.calculate_in_place_rmsd(s, a_ls, rot_s, rot_a_ls))

        num_rots = len(rotamer_lib.rotamers)
        avg_rot_rmsd = safeAvg(num_rots, rmsd_ls)
        num_r_rots = len(r_rmsd_ls)
        avg_r_rot_rmsd = safeAvg(num_r_rots, r_rmsd_ls)

    except Exception as e:
        if 'ALA' not in r.pdbres and 'GLY' not in r.pdbres and 'PRO' not in r.pdbres:
            print(e)

        num_rots = 0
        avg_rot_rmsd = 0
        num_r_rots = 0
        avg_r_rot_rmsd = 0

    return (num_rots, avg_rot_rmsd, num_r_rots, avg_r_rot_rmsd)
def run_test_search(protein, target, start, raw_root, cutoff,
                    rotation_search_step_size, pair_path, no_prot_h,
                    pocket_only, get_time):
    angles = [
        i for i in range(-30, 30 +
                         rotation_search_step_size, rotation_search_step_size)
    ]
    angles = angles[:5]
    x_rot = random.choice(angles)
    y_rot = random.choice(angles)
    z_rot = random.choice(angles)
    grid_points = [i for i in range(-6, 7)]
    grid = [[
        random.choice(grid_points),
        random.choice(grid_points),
        random.choice(grid_points)
    ]]

    conformer = run_search(protein, target, start, 0, raw_root, get_time,
                           cutoff, rotation_search_step_size, grid, no_prot_h,
                           pocket_only, True, x_rot, y_rot, z_rot)

    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    base_conf = list(structure.StructureReader(conformer_file))[0]
    translate_structure(base_conf, grid[0][0], grid[0][1], grid[0][2])
    base_conf_center = list(get_centroid(base_conf))
    coords = base_conf.getXYZ(copy=False)
    new_coords = rotate_structure(coords, math.radians(x_rot),
                                  math.radians(y_rot), math.radians(z_rot),
                                  base_conf_center)
    base_conf.setXYZ(new_coords)

    rmsd_val = rmsd.calculate_in_place_rmsd(conformer,
                                            conformer.getAtomIndices(),
                                            base_conf,
                                            base_conf.getAtomIndices())
    if abs(rmsd_val) == 0:
        print("Search works properly", rmsd_val)
    else:
        print("x_rot =", x_rot, "y_rot =", y_rot, "z_rot =", z_rot)
        print("RMSD =", rmsd_val, "but RMSD should equal 0")
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('task', type=str, help='either align or search')
    parser.add_argument('docked_prot_file',
                        type=str,
                        help='file listing proteins to process')
    parser.add_argument(
        'run_path',
        type=str,
        help='directory where script and output files will be written')
    parser.add_argument('raw_root',
                        type=str,
                        help='directory where raw data will be placed')
    parser.add_argument('--protein', type=str, default='', help='protein name')
    parser.add_argument('--target',
                        type=str,
                        default='',
                        help='target ligand name')
    parser.add_argument('--start',
                        type=str,
                        default='',
                        help='start ligand name')
    parser.add_argument('--align_n',
                        type=int,
                        default=10,
                        help='number of alignments processed in each job')
    parser.add_argument('--rotation_search_step_size',
                        type=int,
                        default=1,
                        help='step size between each angle '
                        'checked, in degrees')
    parser.add_argument('--index',
                        type=int,
                        default=-1,
                        help='grid point group index')
    parser.add_argument(
        '--rmsd_cutoff',
        type=int,
        default=2,
        help='rmsd accuracy cutoff between predicted ligand pose '
        'and true ligand pose')
    parser.add_argument('--num_conformers',
                        type=int,
                        default=300,
                        help='maximum number of conformers considered')
    parser.add_argument('--grid_size',
                        type=int,
                        default=6,
                        help='grid size in positive and negative x, y, z '
                        'directions')
    parser.add_argument('--grid_n',
                        type=int,
                        default=30,
                        help='number of grid_points processed in each job')
    parser.add_argument('--time', dest='get_time', action='store_true')
    parser.add_argument('--no_time', dest='get_time', action='store_false')
    parser.set_defaults(get_time=False)
    parser.add_argument('--remove_prot_h',
                        dest='no_prot_h',
                        action='store_true')
    parser.add_argument('--keep_prot_h',
                        dest='no_prot_h',
                        action='store_false')
    parser.set_defaults(no_prot_h=False)
    parser.add_argument('--prot_pocket_only',
                        dest='pocket_only',
                        action='store_true')
    parser.add_argument('--all_prot', dest='pocket_only', action='store_false')
    parser.set_defaults(pocket_only=False)

    args = parser.parse_args()

    random.seed(0)

    if not os.path.exists(args.run_path):
        os.mkdir(args.run_path)

    pair = '{}-to-{}'.format(args.target, args.start)
    protein_path = os.path.join(args.raw_root, args.protein)
    pair_path = os.path.join(protein_path, pair)

    if args.task == 'conformer_all':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_conformer_all(process, args.raw_root, args.run_path,
                          args.docked_prot_file)

    elif args.task == 'conformer_group':
        target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                       '{}_lig0.mae'.format(args.target))
        gen_ligand_conformers(target_lig_file, pair_path, args.num_conformers)
        if os.path.exists(
                os.path.join(pair_path, '{}_lig0.log'.format(args.target))):
            os.remove(
                os.path.join(pair_path, '{}_lig0.log'.format(args.target)))

    if args.task == 'conformer_check':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_conformer_check(process, args.raw_root)

    if args.task == 'align_all':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_align_all(process, args.raw_root, args.run_path,
                      args.docked_prot_file, args.align_n)

    elif args.task == 'align_group':
        grouped_files = get_conformer_groups(args.align_n, args.target,
                                             args.start, args.protein,
                                             args.raw_root)
        run_align_group(grouped_files, args.index, args.n, args.protein,
                        args.target, args.start, args.raw_root)

    elif args.task == 'align_check':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_align_check(process, args.raw_root)

    elif args.task == 'align_combine':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_align_combine(process, args.raw_root)

    elif args.task == 'run_search':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        search_system_caller(process, args.raw_root, args.run_path,
                             args.docked_prot_file,
                             args.rotation_search_step_size, args.grid_size,
                             grouped_files)

    elif args.task == 'search':
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        run_search(args.protein, args.target, args.start, args.index,
                   args.raw_root, args.get_time, args.rmsd_cutoff,
                   args.rotation_search_step_size, grouped_files[args.index],
                   args.no_prot_h, args.pocket_only)

    elif args.task == 'check_search':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        counter = 0
        unfinished = []
        for protein, target, start in process:
            if counter == 10:
                break
            pair = '{}-to-{}'.format(target, start)
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path, pair)
            conformer_file = os.path.join(pair_path,
                                          "{}_lig0-out.maegz".format(target))
            conformers = list(structure.StructureReader(conformer_file))
            if len(conformers) == 1:
                continue
            else:
                counter += 1
            save_folder = os.path.join(
                os.getcwd(), 'decoy_timing_data',
                '{}_{}-to-{}'.format(protein, target, start))
            for i in range(len(grouped_files)):
                if not os.path.exists(
                        os.path.join(save_folder, '{}.csv'.format(i))):
                    unfinished.append((protein, target, start, i))
        print("Missing:", len(unfinished))
        print(unfinished)

    elif args.task == 'test_search':
        run_test_search(args.protein, args.target, args.start, args.raw_root,
                        args.rmsd_cutoff, args.rotation_search_step_size,
                        pair_path, args.no_prot_h, args.pocket_only,
                        args.get_time)

    elif args.task == 'get_grid_data':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        get_data(process, grouped_files, args.raw_root, args.grid_size)

    elif args.task == 'combine_search_data':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        get_data(process, grouped_files, args.raw_root, args.grid_size, True)

    elif args.task == 'get_dist':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        counter = 0
        for protein, target, start in process:
            if counter == 10:
                break
            pair = '{}-to-{}'.format(target, start)
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path, pair)
            conformer_file = os.path.join(pair_path,
                                          "{}_lig0-out.maegz".format(target))
            conformers = list(structure.StructureReader(conformer_file))
            if len(conformers) == 1:
                continue
            else:
                counter += 1
            start_lig_file = os.path.join(pair_path,
                                          '{}_lig.mae'.format(start))
            start_lig = list(structure.StructureReader(start_lig_file))[0]
            start_lig_center = list(get_centroid(start_lig))
            target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                           '{}_lig0.mae'.format(target))
            target_lig = list(structure.StructureReader(target_lig_file))[0]
            target_lig_center = list(get_centroid(target_lig))
            dist = math.sqrt((
                (start_lig_center[0] - target_lig_center[0])**2) + (
                    (start_lig_center[1] - target_lig_center[1])**2) +
                             ((start_lig_center[2] - target_lig_center[2])**2))
            print(protein, target, start, dist)

    elif args.task == 'test_rotate_translate':
        prot_file = os.path.join(pair_path, '{}_prot.mae'.format(args.start))
        schrodinger_prot = list(structure.StructureReader(prot_file))[0]
        custom_prot = list(structure.StructureReader(prot_file))[0]
        translation_vector = np.random.uniform(low=-100, high=100, size=(3))
        transform.translate_structure(schrodinger_prot, translation_vector[0],
                                      translation_vector[1],
                                      translation_vector[2])
        translate_structure(custom_prot, translation_vector[0],
                            translation_vector[1], translation_vector[2])
        schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False))
        custom_atoms = np.array(custom_prot.getXYZ(copy=False))
        if np.array_equal(schrodinger_atoms, custom_atoms):
            print("Translate function works properly")
        else:
            print("Error in translate function")

        schrodinger_prot = list(structure.StructureReader(prot_file))[0]
        custom_prot = list(structure.StructureReader(prot_file))[0]
        rotation_vector = np.random.uniform(low=-2 * np.pi,
                                            high=2 * np.pi,
                                            size=(3))
        rotation_center = np.random.uniform(low=-100, high=100, size=(3))
        rotation_center = [
            rotation_center[0], rotation_center[1], rotation_center[2]
        ]
        transform.rotate_structure(schrodinger_prot, rotation_vector[0],
                                   rotation_vector[1], rotation_vector[2],
                                   rotation_center)
        coords = rotate_structure(custom_prot.getXYZ(copy=False),
                                  rotation_vector[0], rotation_vector[1],
                                  rotation_vector[2], rotation_center)
        custom_prot.setXYZ(coords)
        schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False))
        custom_atoms = np.array(custom_prot.getXYZ(copy=False))
        if np.amax(np.absolute(schrodinger_atoms - custom_atoms)) < 10**-7:
            print("Rotate function works properly")
        else:
            print("Error in rotate function")

    elif args.task == 'get_rmsd':
        conformer_file = os.path.join(
            pair_path,
            "aligned_to_start_without_hydrogen_conformers.mae".format(
                args.target))
        conformers = list(structure.StructureReader(conformer_file))

        target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                       '{}_lig0.mae'.format(args.target))
        target_lig = list(structure.StructureReader(target_lig_file))[0]
        build.delete_hydrogens(target_lig)
        start_lig_file = os.path.join(pair_path,
                                      '{}_lig.mae'.format(args.start))
        start_lig = list(structure.StructureReader(start_lig_file))[0]
        start_lig_center = list(get_centroid(start_lig))

        rmsds = []
        for i, conformer in tqdm(enumerate(conformers),
                                 desc='going through conformers'):
            conformer_center = list(get_centroid(conformer))
            translate_structure(conformer,
                                start_lig_center[0] - conformer_center[0],
                                start_lig_center[1] - conformer_center[1],
                                start_lig_center[2] - conformer_center[2])
            rmsds.append(
                (conformer,
                 rmsd.calculate_in_place_rmsd(conformer,
                                              conformer.getAtomIndices(),
                                              target_lig,
                                              target_lig.getAtomIndices()), i))

        # best_match_conformer = min(rmsds, key=lambda x: x[1])
        # print(best_match_conformer[1], best_match_conformer[2])
        # file = os.path.join(pair_path, 'best_match_conformer.mae')
        # with structure.StructureWriter(file) as best_match:
        #     best_match.append(best_match_conformer[0])
        print(rmsds[248][1], rmsds[248][2])
        file = os.path.join(pair_path, 'translated_conformer_248.mae')
        with structure.StructureWriter(file) as best_match:
            best_match.append(rmsds[248][0])

    elif args.task == 'check_rotation':
        target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                       '{}_lig0.mae'.format(args.target))
        target_lig = list(structure.StructureReader(target_lig_file))[0]
        remove = [i for i in target_lig.getAtomIndices() if i != 1]
        target_lig.deleteAtoms(remove)
        center = list(get_centroid(target_lig))
        print("ROTATE 5,5,5")
        rotate_structure(target_lig, math.radians(5), math.radians(5),
                         math.radians(5), center)

        target_lig_2 = list(structure.StructureReader(target_lig_file))[0]
        target_lig_2.deleteAtoms(remove)
        center = list(get_centroid(target_lig_2))
        print("ROTATE 5,0,0")
        rotate_structure(target_lig_2, math.radians(5), 0, 0, center)
        print("ROTATE 0,5,0")
        rotate_structure(target_lig_2, 0, math.radians(5), 0, center)
        print("ROTATE 0,0,5")
        rotate_structure(target_lig_2, 0, 0, math.radians(5), center)

        print(
            rmsd.calculate_in_place_rmsd(target_lig,
                                         target_lig.getAtomIndices(),
                                         target_lig_2,
                                         target_lig_2.getAtomIndices()))
        print(target_lig.getXYZ(copy=False))
        print(target_lig_2.getXYZ(copy=False))
def create_conformer_decoys(grid, target_lig, cutoff,
                            rotation_search_step_size, protein, target, start,
                            index, pair_path, test, x_rot, y_rot, z_rot):
    counter = 0
    data_dict = {
        'protein': [],
        'target': [],
        'start': [],
        'num_conformers': [],
        'num_poses_searched': [],
        'num_correct_poses_found': [],
        'time_elapsed': [],
        'time_elapsed_per_conformer': [],
        'grid_loc_x': [],
        'grid_loc_y': [],
        'grid_loc_z': []
    }

    for grid_loc in grid:
        num_correct_found = 0
        conformer_file = os.path.join(
            pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
        conformers = list(structure.StructureReader(conformer_file))
        decoy_start_time = time.time()

        for conformer in conformers:
            transform.translate_structure(conformer, grid_loc[0], grid_loc[1],
                                          grid_loc[2])
            conformer_center = list(get_centroid(conformer))
            coords = conformer.getXYZ(copy=True)

            for x in range(-30, 30 + rotation_search_step_size,
                           rotation_search_step_size):
                for y in range(-30, 30 + rotation_search_step_size,
                               rotation_search_step_size):
                    for z in range(-30, 30 + rotation_search_step_size,
                                   rotation_search_step_size):
                        new_coords = rotate_structure(coords, math.radians(x),
                                                      math.radians(y),
                                                      math.radians(z),
                                                      conformer_center)
                        conformer.setXYZ(new_coords)

                        if test and x_rot == x and y_rot == y and z_rot == z:
                            return conformer

                        rmsd_val = rmsd.calculate_in_place_rmsd(
                            conformer, conformer.getAtomIndices(), target_lig,
                            target_lig.getAtomIndices())
                        if rmsd_val < cutoff:
                            num_correct_found += 1

        decoy_end_time = time.time()

        data_dict['protein'].append(protein)
        data_dict['target'].append(target)
        data_dict['start'].append(start)
        data_dict['num_conformers'].append(len(conformers))
        data_dict['num_poses_searched'].append(counter)
        data_dict['num_correct_poses_found'].append(num_correct_found)
        data_dict['time_elapsed'].append(decoy_end_time - decoy_start_time)
        data_dict['time_elapsed_per_conformer'].append(
            (decoy_end_time - decoy_start_time) / len(conformers))
        data_dict['grid_loc_x'].append(grid_loc[0])
        data_dict['grid_loc_y'].append(grid_loc[1])
        data_dict['grid_loc_z'].append(grid_loc[2])

    df = pd.DataFrame.from_dict(data_dict)
    data_folder = os.path.join(os.getcwd(), 'decoy_timing_data')
    if not os.path.exists(data_folder):
        os.mkdir(data_folder)
    save_folder = os.path.join(data_folder,
                               '{}_{}-to-{}'.format(protein, target, start))
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    df.to_csv(os.path.join(save_folder, '{}.csv'.format(index)))
    return None
def time_conformer_decoys(pair_path, start_lig_center, target_lig, prot,
                          rotation_search_step_size):
    translate_times = []
    rotate_times = []
    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    conformers = list(structure.StructureReader(conformer_file))

    for conformer in conformers:
        conformer_center = list(get_centroid(conformer))

        # translation
        grid_loc = [0, 0, 0]
        start = time.time()
        transform.translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        end = time.time()
        translate_times.append(end - start)

        # rotation
        start = time.time()
        transform.rotate_structure(
            conformer, math.radians(-30 - rotation_search_step_size), 0, 0,
            conformer_center)
        end = time.time()
        rotate_times.append(end - start)

    print("Average schrodinger translate time =",
          statistics.mean(translate_times))
    print("Average schrodinger rotate time =", statistics.mean(rotate_times))

    translate_times = []
    rotate_times = []
    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    conformers = list(structure.StructureReader(conformer_file))

    for conformer in conformers:
        conformer_center = list(get_centroid(conformer))

        # translation
        grid_loc = [0, 0, 0]
        start = time.time()
        translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        end = time.time()
        translate_times.append(end - start)

        # rotation
        start = time.time()
        rotate_structure(conformer,
                         math.radians(-30 - rotation_search_step_size), 0, 0,
                         conformer_center)
        end = time.time()
        rotate_times.append(end - start)

    print("Average custom translate time =", statistics.mean(translate_times))
    print("Average custom rotate time =", statistics.mean(rotate_times))

    clash_iterator_times = []
    clash_volume_times = []
    rmsd_times = []
    rotation_search_step_size_rad = math.radians(rotation_search_step_size)

    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    conformers = list(structure.StructureReader(conformer_file))
    for conformer in conformers:
        conformer_center = list(get_centroid(conformer))

        # translation
        grid_loc = [0, 0, 0]
        translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        conformer_center = list(get_centroid(conformer))

        # keep track of rotation angles
        rotate_structure(conformer,
                         math.radians(-30 - rotation_search_step_size), 0, 0,
                         conformer_center)
        x_so_far = -30 - rotation_search_step_size
        y_so_far = 0
        z_so_far = 0

        for _ in range(-30, 30, rotation_search_step_size):
            # x rotation
            rotate_structure(
                conformer, rotation_search_step_size_rad,
                math.radians(-30 - rotation_search_step_size - y_so_far), 0,
                conformer_center)
            x_so_far += 1
            y_so_far += -30 - rotation_search_step_size - y_so_far

            for _ in range(-30, 30, rotation_search_step_size):
                # y rotation
                rotate_structure(
                    conformer, 0, rotation_search_step_size_rad,
                    math.radians(-30 - rotation_search_step_size - z_so_far),
                    conformer_center)
                y_so_far += 1
                z_so_far += -30 - rotation_search_step_size - z_so_far

                for _ in range(-30, 30, rotation_search_step_size):
                    # z rotation
                    rotate_structure(conformer, 0, 0,
                                     rotation_search_step_size_rad,
                                     conformer_center)
                    z_so_far += 1

                    # get clash_iterator
                    start = time.time()
                    max([
                        x[2] for x in list(
                            steric_clash.clash_iterator(prot,
                                                        struc2=conformer))
                    ])
                    end = time.time()
                    clash_iterator_times.append(end - start)

                    # get clash_volume
                    start = time.time()
                    steric_clash.clash_volume(prot, struc2=conformer)
                    end = time.time()
                    clash_volume_times.append(end - start)

                    # get rmsd
                    start = time.time()
                    rmsd.calculate_in_place_rmsd(conformer,
                                                 conformer.getAtomIndices(),
                                                 target_lig,
                                                 target_lig.getAtomIndices())
                    end = time.time()
                    rmsd_times.append(end - start)

                    if len(clash_iterator_times) == 1000:
                        print("Average clash iterator time =",
                              statistics.mean(clash_iterator_times))
                        print("Average clash volume time =",
                              statistics.mean(clash_volume_times))
                        print("Average rmsd time =",
                              statistics.mean(rmsd_times))
                        return
Esempio n. 6
0
                lig,
                'C1NNC(C12)CCNC2')  #getting the core indices of the ligand
            for index in indices:  #because the nested list gives the rmsd calculation big problems......
                lig_core = index

            xtal_lig = analyze.evaluate_asl(
                xtal, '(res.ptype "BC7 ")'
            )  #getting the indices of the coxtal ligand after superposition
            xtal_lig_struct = (struct._AtomCollection(
                xtal, xtal_lig)).extractStructure(
                )  #extracting the coxtal ligand as a separate structure
            indices_2 = analyze.evaluate_smarts_canvas(
                xtal_lig_struct,
                'c1nnc(c12)CCNC2')  #getting core indices of coxtal ligand
            for index in indices_2:
                xtal_core = index

            core_rmsd = rmsd.calculate_in_place_rmsd(
                xtal_lig_struct, xtal_core, lig,
                lig_core)  #calculating rmsd of cores

            dataline = [ligand, cen_num, score, core_rmsd]
            data.append(dataline)

        data.sort(key=lambda x: x[2])
        print(data[0])
        for line in data:
            newline = line[0] + ',' + str(line[1]) + ',' + str(
                line[2]) + ',' + str(line[3]) + '\n'
            datafile.write(newline)
Esempio n. 7
0
def compute_protein_rmsds(paired_strs, protein, start, target, s1, s2, chains,
                          protein_folder):
    (paired_str_s1, paired_str_s2) = paired_strs[protein][start][target]

    r_list_s1 = get_all_res(s1, chains, protein)
    r_list_s2 = get_all_res(s2, chains, protein)

    r_to_i_map_s1 = map_residues_to_align_index(paired_str_s1, r_list_s1)
    r_to_i_map_s2 = map_residues_to_align_index(paired_str_s2, r_list_s2)
    i_to_r_map_s1 = inv_map(r_to_i_map_s1)
    i_to_r_map_s2 = inv_map(r_to_i_map_s2)

    pocket_file_s1 = os.path.join(protein_folder, start + '_pocket.mae')
    pocket_file_s2 = os.path.join(protein_folder, target + '_pocket.mae')
    pocket_s1 = list(structure.StructureReader(pocket_file_s1))[0]
    pocket_s2 = list(structure.StructureReader(pocket_file_s2))[0]
    valid_r_s1 = get_res_near_ligand(r_to_i_map_s1, pocket_s1)
    valid_r_s2 = get_res_near_ligand(r_to_i_map_s2, pocket_s2)

    if valid_r_s1 == set({}):
        print(protein, start, "no residues close to the ligand")
        return

    if valid_r_s1 == 0:
        print(protein, target, "pose viewer file has no ligand")
        return

    if valid_r_s2 == set({}):
        print(protein, start, "no residues close to the ligand")
        return

    if valid_r_s2 == 0:
        print(protein, target, "pose viewer file has no ligand")
        return
    print("Calculating")
    final_r_list_s1 = []
    final_r_list_s2 = []

    for r in valid_r_s1:
        s1index = r_to_i_map_s1[r]

        if paired_str_s1[s1index] == paired_str_s2[s1index]:
            if r not in final_r_list_s1:
                final_r_list_s1.append(r)

            if i_to_r_map_s2[s1index] not in final_r_list_s2:
                final_r_list_s2.append(i_to_r_map_s2[s1index])

    for r in valid_r_s2:
        s2index = r_to_i_map_s2[r]

        if paired_str_s2[s2index] == paired_str_s1[s2index]:
            if r not in final_r_list_s2:
                final_r_list_s2.append(r)

            if i_to_r_map_s1[s2index] not in final_r_list_s1:
                final_r_list_s1.append(i_to_r_map_s1[s2index])

    (asl_list_s1, a_list_s1) = get_atoms(s1, final_r_list_s1)
    (asl_list_s2, a_list_s2) = get_atoms(s2, final_r_list_s2)
    rmsd_ls = []

    for k in range(len(a_list_s1)):
        if len(a_list_s1[k]) == len(a_list_s2[k]):
            rmsd_val = rmsd.calculate_in_place_rmsd(s1, a_list_s1[k], s2,
                                                    a_list_s2[k])
            rmsd_ls.append(rmsd_val)

    return rmsd_ls
Esempio n. 8
0
def rmsdRef():
    lines = []
    with open('v92.finalResult', 'r') as f:
        lines = f.readlines()[1:]
    seeds = []
    energies = []
    nativeRMSDs = []
    for line in lines:
        terms = line.split()
        seeds.append(int(terms[0]))
        energies.append(float(terms[8]))
        nativeRMSDs.append(terms[9])
    energies, seeds, nativeRMSDs = (list(t) for t in \
    zip(*sorted(zip(energies, seeds, nativeRMSDs))))

    cwd = os.getcwd()

    pattern = None
    with open('v92.con', 'r') as f:
        conLines = f.readlines()
    for line in conLines:
        if 'subjob_control' in line:
            terms = line.split()
            pattern = terms[2]

    structs = []
    for i in range(len(seeds)):
        for dir in os.listdir(os.path.join(cwd, 'subJobs')):
            if dir.split('_')[0] == str(seeds[i]):
                os.chdir(os.path.join(cwd, 'subJobs', dir))
                if 'plop.stdout' in os.listdir('.'):
                    stName = '4KUZ-p' + str(
                        pattern) + '-' + nativeRMSDs[i] + '_template.maegz'
                    structs.append(next(structure.StructureReader(stName)))
                os.chdir(cwd)
    minStruct = copy.deepcopy(structs[0])

    ALLINDICES = analyze.evaluate_asl(minStruct, ALLINDICES_asl)
    LOOPENVINDICES = analyze.evaluate_asl(minStruct, LOOPENVINDICES_asl)
    NONLOOPINDICES = analyze.evaluate_asl(minStruct, NONLOOPINDICES_asl)

    rmsds = []
    for i in range(0, len(structs)):
        curStruct = structs[i]
        rmsd.superimpose(minStruct, NONLOOPINDICES, curStruct, NONLOOPINDICES)
        RMSD = rmsd.calculate_in_place_rmsd(minStruct, LOOPENVINDICES,
                                            curStruct, LOOPENVINDICES)
        rmsds.append(RMSD)

    # What about Hbond patterns?
    hbonds = []
    for i in range(0, len(structs)):
        curStruct = structs[i]
        hbonds.append(hbond.get_hydrogen_bonds(curStruct, LOOPENVINDICES))

    hbondIndices = []
    for i in range(0, len(hbonds)):
        structIndices = []
        hbondIndices.append(structIndices)
        for j in range(0, len(hbonds[i])):
            pairIndices = []
            hbondIndices[i].append(pairIndices)
            for k in range(0, 2):
                hbondIndices[i][j].append(hbonds[i][j][k].index)

    min_hb_indices = copy.deepcopy(hbondIndices[0])
    hbond_overlaps = []
    for i in range(0, len(hbondIndices)):
        li1 = [tuple(lst) for lst in min_hb_indices]
        li2 = [tuple(lst) for lst in hbondIndices[i]]

        overlap = []
        for pair in li1:
            if pair in li2:
                overlap.append(pair)
        sm = difflib.SequenceMatcher(None, li1, li2)
        hbond_overlaps.append(round(sm.ratio(), 5))

    # What about salt bridge interactions?
    bridges = []
    for i in range(0, len(structs)):
        curStruct = structs[i]
        bridges.append(salt_bridge.get_salt_bridges(curStruct, LOOPENVINDICES))

    bridgeIndices = []
    for i in range(0, len(bridges)):
        structIndices = []
        bridgeIndices.append(structIndices)
        for j in range(0, len(bridges[i])):
            pairIndices = []
            bridgeIndices[i].append(pairIndices)
            for k in range(0, 2):
                bridgeIndices[i][j].append(bridges[i][j][k].index)

    min_bridge_indices = copy.deepcopy(bridgeIndices[0])
    salt_bridge_overlaps = []
    for i in range(0, len(bridgeIndices)):
        li1 = [tuple(lst) for lst in min_bridge_indices]
        li2 = [tuple(lst) for lst in bridgeIndices[i]]

        overlap = []
        for pair in li1:
            if pair in li2:
                overlap.append(pair)
        sm = difflib.SequenceMatcher(None, li1, li2)
        salt_bridge_overlaps.append(round(sm.ratio(), 5))

    # Hydrophobic interactions

    print('SEED\t\tRMSD\t\tHBOND_OVERLAP\tSALTBR_OVERLAP\tENERGY')
    for i in range(0, len(rmsds)):
        print(
            str(seeds[i]) + '\t\t' + str(round(rmsds[i], 3)) + '\t\t' +
            str(hbond_overlaps[i] * 100) + '\t\t' +
            str(salt_bridge_overlaps[i] * 100) + '\t\t' + str(energies[i]))
def compute_protein_rmsds(protein, rmsd_file, combind_root):
    mcss_data = pd.read_csv(
        "../../similarity/Data/mcss/{}_mcss.csv".format(protein))

    with open(rmsd_file, 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow([
            'protein', 'start ligand', 'target ligand', 'name', 'num',
            'bfactor', 'normalized bfactor', 'prev prev bfactor',
            'prev bfactor', 'next bfactor', 'next next bfactor', 'mol weight',
            'general number of rotamers', 'general avg rmsd of rotamers',
            'specific number of rotamers', 'specific avg rmsd of rotamers',
            'packing', 'solvent accessibility', 'secondary structure',
            'ligand similarity', 'ligand similarity ratio',
            'ligand size difference', 'ligand size ratio', 'complete rmsd',
            'backbone rmsd', 'sidechain rmsd'
        ])

        ligands = get_ligands(protein, max_ligands, combind_root)
        infile = open(
            '../../protein_flexibility/Data/alignments/{}_alignment.pkl'.
            format(protein), 'rb')
        paired_strs = pickle.load(infile)
        infile.close()

        for start in ligands:
            ASL_to_feature_path = '../Data/feature_vectors_packing/' + protein + '/' + start + '.pkl'

            if not os.path.exists(ASL_to_feature_path):
                print(ASL_to_feature_path)
                continue

            infile = open(ASL_to_feature_path, 'rb')
            ASL_to_feature = pickle.load(infile)
            infile.close()
            print('Start', start)
            ending_1 = '{}/structures/aligned_files/{}/{}_out.mae'.format(
                protein, start, start)
            s1 = list(structure.StructureReader(combind_root + ending_1))[0]

            for target in ligands:
                if start != target:
                    ending_2 = '{}/structures/aligned_files/{}/{}_out.mae'.format(
                        protein, target, target)
                    s2 = list(
                        structure.StructureReader(combind_root + ending_2))[0]

                    if start < target:
                        (paired_str_s1,
                         paired_str_s2) = paired_strs[start][target]

                    else:
                        (paired_str_s2,
                         paired_str_s1) = paired_strs[target][start]

                    (ligSim, ligSimRatio, ligSizeDiff,
                     ligSizeRatio) = ligSimMetrics(start, target, mcss_data)

                    r_list_s1 = get_all_res(s1)
                    r_list_s2 = get_all_res(s2)

                    r_to_i_map_s1 = map_residues_to_align_index(
                        paired_str_s1, r_list_s1)
                    r_to_i_map_s2 = map_residues_to_align_index(
                        paired_str_s2, r_list_s2)
                    i_to_r_map_s1 = inv_map(r_to_i_map_s1)
                    i_to_r_map_s2 = inv_map(r_to_i_map_s2)

                    valid_r_s1 = get_res_near_ligand(s1, r_to_i_map_s1)
                    valid_r_s2 = get_res_near_ligand(s2, r_to_i_map_s2)

                    if valid_r_s1 == set({}):
                        print(protein, start,
                              "no residues close to the ligand")
                        continue

                    if valid_r_s1 == 0:
                        print(protein, target,
                              "pose viewer file has no ligand")
                        continue

                    if valid_r_s2 == set({}):
                        print(protein, start,
                              "no residues close to the ligand")
                        continue

                    if valid_r_s2 == 0:
                        print(protein, target,
                              "pose viewer file has no ligand")
                        continue

                    final_r_list_s1 = []
                    final_r_list_s2 = []

                    for r in valid_r_s1:
                        s1index = r_to_i_map_s1[r]

                        if paired_str_s1[s1index] == paired_str_s2[s1index]:
                            if r not in final_r_list_s1:
                                final_r_list_s1.append(r)

                            if i_to_r_map_s2[s1index] not in final_r_list_s2:
                                final_r_list_s2.append(i_to_r_map_s2[s1index])

                    for r in valid_r_s2:
                        s2index = r_to_i_map_s2[r]

                        if paired_str_s2[s2index] == paired_str_s1[s2index]:
                            if r not in final_r_list_s2:
                                final_r_list_s2.append(r)

                            if i_to_r_map_s1[s2index] not in final_r_list_s1:
                                final_r_list_s1.append(i_to_r_map_s1[s2index])

                    (asl_list_s1, a_list_s1, backbone_a_list_s1,
                     sidechain_a_list_s1) = get_atoms(s1, final_r_list_s1)
                    (asl_list_s2, a_list_s2, backbone_a_list_s2,
                     sidechain_a_list_s2) = get_atoms(s2, final_r_list_s2)

                    for k in range(len(a_list_s1)):
                        if len(a_list_s1[k]) == len(a_list_s2[k]):
                            rmsd_val = rmsd.calculate_in_place_rmsd(
                                s1, a_list_s1[k], s2, a_list_s2[k])
                            backbone_rmsd_val = rmsd.calculate_in_place_rmsd(
                                s1, backbone_a_list_s1[k], s2,
                                backbone_a_list_s2[k])
                            sidechain_rmsd_val = rmsd.calculate_in_place_rmsd(
                                s1, sidechain_a_list_s1[k], s2,
                                sidechain_a_list_s2[k])
                            feature = ASL_to_feature[asl_list_s1[k]]
                            writer.writerow([
                                protein, start, target, feature[0], feature[1],
                                feature[2], feature[3], feature[4], feature[5],
                                feature[6], feature[7], feature[8], feature[9],
                                feature[10], feature[11], feature[12],
                                feature[13], feature[14], feature[15], ligSim,
                                ligSimRatio, ligSizeDiff, ligSizeRatio,
                                rmsd_val, backbone_rmsd_val, sidechain_rmsd_val
                            ])
def create_conformer_decoys(save_path, run_path, conformers, grid,
                            num_jobs_submitted, start_lig_center, target_lig,
                            prot, min_angle, max_angle, rmsd_cutoff, protein,
                            target, start, index):
    conformer_ls = [[c, 0] for c in conformers]

    rot_ls = []
    for rot_x in range(int(math.degrees(min_angle)),
                       int(math.degrees(max_angle)) + 1):
        for rot_y in range(int(math.degrees(min_angle)),
                           int(math.degrees(max_angle)) + 1):
            for rot_z in range(int(math.degrees(min_angle)),
                               int(math.degrees(max_angle)) + 1):
                rot_ls.append([[
                    math.radians(rot_x),
                    math.radians(rot_y),
                    math.radians(rot_z)
                ], 0])

    output_file = os.path.join(
        run_path, '{}_{}_{}_{}.txt'.format(protein, target, start, index))
    num_iter_without_pose = 0
    num_valid_poses = 0
    num_total_poses = 0

    while True:
        num_iter_without_pose += 1
        num_total_poses += 1
        if num_total_poses % 1000 == 0:
            num_jobs_in_queue = get_jobs_in_queue('{}{}{}'.format(
                protein[0], target[0], start[0]))
            f = open(output_file, "a")
            f.write(
                "num_total_poses: {}, len(grid): {}, len(conformer_ls): {}, len(rot_ls): {}, num_jobs_in_queue: "
                "{}\n".format(num_total_poses, len(grid), len(conformer_ls),
                              len(rot_ls), num_jobs_in_queue))
            f.close()
            if num_jobs_in_queue != num_jobs_submitted:
                break
        conformer_index = random.randint(0, len(conformer_ls) - 1)
        conformer = conformer_ls[conformer_index][0]
        conformer_center = list(get_centroid(conformer))

        # translation
        index = random.randint(0, len(grid) - 1)
        grid_loc = grid[index][0]
        transform.translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        conformer_center = list(get_centroid(conformer))

        # rotation
        if len(grid) > 1:
            x_angle = np.random.uniform(min_angle, max_angle)
            y_angle = np.random.uniform(min_angle, max_angle)
            z_angle = np.random.uniform(min_angle, max_angle)
        else:
            rot_index = random.randint(0, len(rot_ls) - 1)
            x_angle, y_angle, z_angle = rot_ls[rot_index][0]
        transform.rotate_structure(conformer, x_angle, y_angle, z_angle,
                                   conformer_center)

        if steric_clash.clash_volume(prot, struc2=conformer) < 200:
            num_valid_poses += 1
            if rmsd.calculate_in_place_rmsd(
                    conformer, conformer.getAtomIndices(), target_lig,
                    target_lig.getAtomIndices()) < rmsd_cutoff:
                save_file = os.path.join(
                    save_path, '{}_{}_{}.txt'.format(protein, target, start))
                f = open(output_file, "a")
                f.write("Num poses searched = {}\n".format(num_total_poses))
                f.write("Num acceptable clash poses searched = {}\n".format(
                    num_valid_poses))
                f.close()
                if not os.path.exists(save_file):
                    with open(save_file, 'w') as f:
                        f.write("Num poses searched = {}\n".format(
                            num_total_poses))
                        f.write("Num acceptable clash poses searched = {}\n".
                                format(num_valid_poses))
                break
            grid[index][1] = 0
            num_iter_without_pose = 0
        elif num_iter_without_pose == 5 and len(grid) > 1:
            max_val = max(grid, key=lambda x: x[1])
            grid.remove(max_val)
            num_iter_without_pose = 0
        elif num_iter_without_pose == 5 and len(grid) == 1:
            if len(conformer_ls) == 1 and len(rot_ls) == 1:
                save_file = os.path.join(
                    save_path, '{}_{}_{}.txt'.format(protein, target, start))
                f = open(output_file, "a")
                f.write("Num poses searched = {}\n".format(num_total_poses))
                f.write("Num acceptable clash poses searched = {}\n".format(
                    num_valid_poses))
                f.write("No correct poses found\n")
                f.close()
                if not os.path.exists(save_file):
                    with open(save_file, 'w') as f:
                        f.write("Num poses searched = {}\n".format(
                            num_total_poses))
                        f.write("Num acceptable clash poses searched = {}\n".
                                format(num_valid_poses))
                        f.write("No correct poses found\n")
                break
            elif len(conformer_ls) > 1 and (len(rot_ls) == 1 or
                                            (len(conformer_ls) + len(rot_ls)) %
                                            2 == 0):
                max_val = max(conformer_ls, key=lambda x: x[1])
                conformer_ls.remove(max_val)
            else:
                max_val = max(rot_ls, key=lambda x: x[1])
                rot_ls.remove(max_val)
            num_iter_without_pose = 0
        else:
            grid[index][1] += 1
            conformer_ls[conformer_index][1] += 1
            if len(grid) == 1:
                rot_ls[rot_index][1] += 1