def create_decoys(lig_file): """ creates MAX_DECOYS number of translated/rotated decoys :param lig_file: (string) file of glide ligand pose that will be translated/rotated :return: """ code = lig_file.split('/')[-1].split('_')[-1] if code == 'lig0.mae': modify_file(lig_file, '_pro_ligand') else: modify_file(lig_file, '_ligand') for i in range(MAX_DECOYS): s = list(structure.StructureReader(lig_file))[0] #translation x, y, z = random_three_vector() dist = np.random.normal(MEAN_TRANSLATION, STDEV_TRANSLATION) transform.translate_structure(s, x * dist, y * dist, z * dist) #rotation x_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE) y_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE) z_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE) rot_center = list(get_centroid(s)) transform.rotate_structure(s, x_angle, y_angle, z_angle, rot_center) decoy_file = lig_file[:-4] + chr(ord('a') + i) + '.mae' with structure.StructureWriter(decoy_file) as decoy: decoy.append(s) if code == 'lig0.mae': modify_file(decoy_file, lig_file.split('/')[-1]) else: modify_file(decoy_file, lig_file.split('/')[-1])
def create_conformer_decoys(conformers, grid_size, start_lig_center, prot, pose_path, target, max_poses, min_angle, max_angle): num_iter_without_pose = 0 num_valid_poses = 1 grid = [] for dx in range(-grid_size, grid_size): for dy in range(-grid_size, grid_size): for dz in range(-grid_size, grid_size): grid.append([[dx, dy, dz], 0]) while num_valid_poses < max_poses: num_iter_without_pose += 1 conformer = random.choice(conformers) conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: decoy_file = os.path.join( pose_path, "{}_lig{}.mae".format(target, num_valid_poses)) with structure.StructureWriter(decoy_file) as decoy: decoy.append(conformer) modify_file(decoy_file, '_pro_ligand') modify_file(decoy_file, '{}_lig0.mae'.format(target)) num_valid_poses += 1 grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help='either align or search') parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument( 'run_path', type=str, help='directory where script and output files will be written') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') parser.add_argument('--protein', type=str, default='', help='protein name') parser.add_argument('--target', type=str, default='', help='target ligand name') parser.add_argument('--start', type=str, default='', help='start ligand name') parser.add_argument('--align_n', type=int, default=10, help='number of alignments processed in each job') parser.add_argument('--rotation_search_step_size', type=int, default=1, help='step size between each angle ' 'checked, in degrees') parser.add_argument('--index', type=int, default=-1, help='grid point group index') parser.add_argument( '--rmsd_cutoff', type=int, default=2, help='rmsd accuracy cutoff between predicted ligand pose ' 'and true ligand pose') parser.add_argument('--num_conformers', type=int, default=300, help='maximum number of conformers considered') parser.add_argument('--grid_size', type=int, default=6, help='grid size in positive and negative x, y, z ' 'directions') parser.add_argument('--grid_n', type=int, default=30, help='number of grid_points processed in each job') parser.add_argument('--time', dest='get_time', action='store_true') parser.add_argument('--no_time', dest='get_time', action='store_false') parser.set_defaults(get_time=False) parser.add_argument('--remove_prot_h', dest='no_prot_h', action='store_true') parser.add_argument('--keep_prot_h', dest='no_prot_h', action='store_false') parser.set_defaults(no_prot_h=False) parser.add_argument('--prot_pocket_only', dest='pocket_only', action='store_true') parser.add_argument('--all_prot', dest='pocket_only', action='store_false') parser.set_defaults(pocket_only=False) args = parser.parse_args() random.seed(0) if not os.path.exists(args.run_path): os.mkdir(args.run_path) pair = '{}-to-{}'.format(args.target, args.start) protein_path = os.path.join(args.raw_root, args.protein) pair_path = os.path.join(protein_path, pair) if args.task == 'conformer_all': process = get_prots(args.docked_prot_file) random.shuffle(process) run_conformer_all(process, args.raw_root, args.run_path, args.docked_prot_file) elif args.task == 'conformer_group': target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) gen_ligand_conformers(target_lig_file, pair_path, args.num_conformers) if os.path.exists( os.path.join(pair_path, '{}_lig0.log'.format(args.target))): os.remove( os.path.join(pair_path, '{}_lig0.log'.format(args.target))) if args.task == 'conformer_check': process = get_prots(args.docked_prot_file) random.shuffle(process) run_conformer_check(process, args.raw_root) if args.task == 'align_all': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_all(process, args.raw_root, args.run_path, args.docked_prot_file, args.align_n) elif args.task == 'align_group': grouped_files = get_conformer_groups(args.align_n, args.target, args.start, args.protein, args.raw_root) run_align_group(grouped_files, args.index, args.n, args.protein, args.target, args.start, args.raw_root) elif args.task == 'align_check': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_check(process, args.raw_root) elif args.task == 'align_combine': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_combine(process, args.raw_root) elif args.task == 'run_search': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) search_system_caller(process, args.raw_root, args.run_path, args.docked_prot_file, args.rotation_search_step_size, args.grid_size, grouped_files) elif args.task == 'search': grouped_files = get_grid_groups(args.grid_size, args.grid_n) run_search(args.protein, args.target, args.start, args.index, args.raw_root, args.get_time, args.rmsd_cutoff, args.rotation_search_step_size, grouped_files[args.index], args.no_prot_h, args.pocket_only) elif args.task == 'check_search': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) counter = 0 unfinished = [] for protein, target, start in process: if counter == 10: break pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) conformer_file = os.path.join(pair_path, "{}_lig0-out.maegz".format(target)) conformers = list(structure.StructureReader(conformer_file)) if len(conformers) == 1: continue else: counter += 1 save_folder = os.path.join( os.getcwd(), 'decoy_timing_data', '{}_{}-to-{}'.format(protein, target, start)) for i in range(len(grouped_files)): if not os.path.exists( os.path.join(save_folder, '{}.csv'.format(i))): unfinished.append((protein, target, start, i)) print("Missing:", len(unfinished)) print(unfinished) elif args.task == 'test_search': run_test_search(args.protein, args.target, args.start, args.raw_root, args.rmsd_cutoff, args.rotation_search_step_size, pair_path, args.no_prot_h, args.pocket_only, args.get_time) elif args.task == 'get_grid_data': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) get_data(process, grouped_files, args.raw_root, args.grid_size) elif args.task == 'combine_search_data': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) get_data(process, grouped_files, args.raw_root, args.grid_size, True) elif args.task == 'get_dist': process = get_prots(args.docked_prot_file) random.shuffle(process) counter = 0 for protein, target, start in process: if counter == 10: break pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) conformer_file = os.path.join(pair_path, "{}_lig0-out.maegz".format(target)) conformers = list(structure.StructureReader(conformer_file)) if len(conformers) == 1: continue else: counter += 1 start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start)) start_lig = list(structure.StructureReader(start_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(target)) target_lig = list(structure.StructureReader(target_lig_file))[0] target_lig_center = list(get_centroid(target_lig)) dist = math.sqrt(( (start_lig_center[0] - target_lig_center[0])**2) + ( (start_lig_center[1] - target_lig_center[1])**2) + ((start_lig_center[2] - target_lig_center[2])**2)) print(protein, target, start, dist) elif args.task == 'test_rotate_translate': prot_file = os.path.join(pair_path, '{}_prot.mae'.format(args.start)) schrodinger_prot = list(structure.StructureReader(prot_file))[0] custom_prot = list(structure.StructureReader(prot_file))[0] translation_vector = np.random.uniform(low=-100, high=100, size=(3)) transform.translate_structure(schrodinger_prot, translation_vector[0], translation_vector[1], translation_vector[2]) translate_structure(custom_prot, translation_vector[0], translation_vector[1], translation_vector[2]) schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False)) custom_atoms = np.array(custom_prot.getXYZ(copy=False)) if np.array_equal(schrodinger_atoms, custom_atoms): print("Translate function works properly") else: print("Error in translate function") schrodinger_prot = list(structure.StructureReader(prot_file))[0] custom_prot = list(structure.StructureReader(prot_file))[0] rotation_vector = np.random.uniform(low=-2 * np.pi, high=2 * np.pi, size=(3)) rotation_center = np.random.uniform(low=-100, high=100, size=(3)) rotation_center = [ rotation_center[0], rotation_center[1], rotation_center[2] ] transform.rotate_structure(schrodinger_prot, rotation_vector[0], rotation_vector[1], rotation_vector[2], rotation_center) coords = rotate_structure(custom_prot.getXYZ(copy=False), rotation_vector[0], rotation_vector[1], rotation_vector[2], rotation_center) custom_prot.setXYZ(coords) schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False)) custom_atoms = np.array(custom_prot.getXYZ(copy=False)) if np.amax(np.absolute(schrodinger_atoms - custom_atoms)) < 10**-7: print("Rotate function works properly") else: print("Error in rotate function") elif args.task == 'get_rmsd': conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae".format( args.target)) conformers = list(structure.StructureReader(conformer_file)) target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) target_lig = list(structure.StructureReader(target_lig_file))[0] build.delete_hydrogens(target_lig) start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(args.start)) start_lig = list(structure.StructureReader(start_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) rmsds = [] for i, conformer in tqdm(enumerate(conformers), desc='going through conformers'): conformer_center = list(get_centroid(conformer)) translate_structure(conformer, start_lig_center[0] - conformer_center[0], start_lig_center[1] - conformer_center[1], start_lig_center[2] - conformer_center[2]) rmsds.append( (conformer, rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()), i)) # best_match_conformer = min(rmsds, key=lambda x: x[1]) # print(best_match_conformer[1], best_match_conformer[2]) # file = os.path.join(pair_path, 'best_match_conformer.mae') # with structure.StructureWriter(file) as best_match: # best_match.append(best_match_conformer[0]) print(rmsds[248][1], rmsds[248][2]) file = os.path.join(pair_path, 'translated_conformer_248.mae') with structure.StructureWriter(file) as best_match: best_match.append(rmsds[248][0]) elif args.task == 'check_rotation': target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) target_lig = list(structure.StructureReader(target_lig_file))[0] remove = [i for i in target_lig.getAtomIndices() if i != 1] target_lig.deleteAtoms(remove) center = list(get_centroid(target_lig)) print("ROTATE 5,5,5") rotate_structure(target_lig, math.radians(5), math.radians(5), math.radians(5), center) target_lig_2 = list(structure.StructureReader(target_lig_file))[0] target_lig_2.deleteAtoms(remove) center = list(get_centroid(target_lig_2)) print("ROTATE 5,0,0") rotate_structure(target_lig_2, math.radians(5), 0, 0, center) print("ROTATE 0,5,0") rotate_structure(target_lig_2, 0, math.radians(5), 0, center) print("ROTATE 0,0,5") rotate_structure(target_lig_2, 0, 0, math.radians(5), center) print( rmsd.calculate_in_place_rmsd(target_lig, target_lig.getAtomIndices(), target_lig_2, target_lig_2.getAtomIndices())) print(target_lig.getXYZ(copy=False)) print(target_lig_2.getXYZ(copy=False))
def time_conformer_decoys(pair_path, start_lig_center, target_lig, prot, rotation_search_step_size): translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() transform.rotate_structure( conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average schrodinger translate time =", statistics.mean(translate_times)) print("Average schrodinger rotate time =", statistics.mean(rotate_times)) translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average custom translate time =", statistics.mean(translate_times)) print("Average custom rotate time =", statistics.mean(rotate_times)) clash_iterator_times = [] clash_volume_times = [] rmsd_times = [] rotation_search_step_size_rad = math.radians(rotation_search_step_size) conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # keep track of rotation angles rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) x_so_far = -30 - rotation_search_step_size y_so_far = 0 z_so_far = 0 for _ in range(-30, 30, rotation_search_step_size): # x rotation rotate_structure( conformer, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - y_so_far), 0, conformer_center) x_so_far += 1 y_so_far += -30 - rotation_search_step_size - y_so_far for _ in range(-30, 30, rotation_search_step_size): # y rotation rotate_structure( conformer, 0, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - z_so_far), conformer_center) y_so_far += 1 z_so_far += -30 - rotation_search_step_size - z_so_far for _ in range(-30, 30, rotation_search_step_size): # z rotation rotate_structure(conformer, 0, 0, rotation_search_step_size_rad, conformer_center) z_so_far += 1 # get clash_iterator start = time.time() max([ x[2] for x in list( steric_clash.clash_iterator(prot, struc2=conformer)) ]) end = time.time() clash_iterator_times.append(end - start) # get clash_volume start = time.time() steric_clash.clash_volume(prot, struc2=conformer) end = time.time() clash_volume_times.append(end - start) # get rmsd start = time.time() rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) end = time.time() rmsd_times.append(end - start) if len(clash_iterator_times) == 1000: print("Average clash iterator time =", statistics.mean(clash_iterator_times)) print("Average clash volume time =", statistics.mean(clash_volume_times)) print("Average rmsd time =", statistics.mean(rmsd_times)) return
def create_conformer_decoys(save_path, run_path, conformers, grid, num_jobs_submitted, start_lig_center, target_lig, prot, min_angle, max_angle, rmsd_cutoff, protein, target, start, index): conformer_ls = [[c, 0] for c in conformers] rot_ls = [] for rot_x in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_y in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_z in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): rot_ls.append([[ math.radians(rot_x), math.radians(rot_y), math.radians(rot_z) ], 0]) output_file = os.path.join( run_path, '{}_{}_{}_{}.txt'.format(protein, target, start, index)) num_iter_without_pose = 0 num_valid_poses = 0 num_total_poses = 0 while True: num_iter_without_pose += 1 num_total_poses += 1 if num_total_poses % 1000 == 0: num_jobs_in_queue = get_jobs_in_queue('{}{}{}'.format( protein[0], target[0], start[0])) f = open(output_file, "a") f.write( "num_total_poses: {}, len(grid): {}, len(conformer_ls): {}, len(rot_ls): {}, num_jobs_in_queue: " "{}\n".format(num_total_poses, len(grid), len(conformer_ls), len(rot_ls), num_jobs_in_queue)) f.close() if num_jobs_in_queue != num_jobs_submitted: break conformer_index = random.randint(0, len(conformer_ls) - 1) conformer = conformer_ls[conformer_index][0] conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation if len(grid) > 1: x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) else: rot_index = random.randint(0, len(rot_ls) - 1) x_angle, y_angle, z_angle = rot_ls[rot_index][0] transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: num_valid_poses += 1 if rmsd.calculate_in_place_rmsd( conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) < rmsd_cutoff: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) break grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) == 1: if len(conformer_ls) == 1 and len(rot_ls) == 1: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.write("No correct poses found\n") f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) f.write("No correct poses found\n") break elif len(conformer_ls) > 1 and (len(rot_ls) == 1 or (len(conformer_ls) + len(rot_ls)) % 2 == 0): max_val = max(conformer_ls, key=lambda x: x[1]) conformer_ls.remove(max_val) else: max_val = max(rot_ls, key=lambda x: x[1]) rot_ls.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1 conformer_ls[conformer_index][1] += 1 if len(grid) == 1: rot_ls[rot_index][1] += 1