def rotamers(rot_s, rot_r, s, r, cutoff): try: rotamer_lib = rotamers.Rotamers(rot_s, list(rot_r.atom)[0]) a_ls = r.getAtomList() r_rmsd_ls = [] rmsd_ls = [] counter = 0 for k, rotamer in enumerate(list(rotamer_lib.rotamers)): rotamer.apply() rot_a_ls = rot_r.getAtomList() no_r_a_ls = [ a.index for a in rot_s.atom if a.index not in rot_a_ls and a.chain != 'L' ] clash = steric_clash.clash_volume(rot_s, rot_a_ls, rot_s, no_r_a_ls) if 'LEU' in r.pdbres and r.resnum == 167: print(k, clash) if clash < cutoff: counter += 1 r_rmsd_ls.append( rmsd.calculate_in_place_rmsd(s, a_ls, rot_s, rot_a_ls)) rmsd_ls.append( rmsd.calculate_in_place_rmsd(s, a_ls, rot_s, rot_a_ls)) num_rots = len(rotamer_lib.rotamers) avg_rot_rmsd = safeAvg(num_rots, rmsd_ls) num_r_rots = len(r_rmsd_ls) avg_r_rot_rmsd = safeAvg(num_r_rots, r_rmsd_ls) except Exception as e: if 'ALA' not in r.pdbres and 'GLY' not in r.pdbres and 'PRO' not in r.pdbres: print(e) num_rots = 0 avg_rot_rmsd = 0 num_r_rots = 0 avg_r_rot_rmsd = 0 return (num_rots, avg_rot_rmsd, num_r_rots, avg_r_rot_rmsd)
def run_test_search(protein, target, start, raw_root, cutoff, rotation_search_step_size, pair_path, no_prot_h, pocket_only, get_time): angles = [ i for i in range(-30, 30 + rotation_search_step_size, rotation_search_step_size) ] angles = angles[:5] x_rot = random.choice(angles) y_rot = random.choice(angles) z_rot = random.choice(angles) grid_points = [i for i in range(-6, 7)] grid = [[ random.choice(grid_points), random.choice(grid_points), random.choice(grid_points) ]] conformer = run_search(protein, target, start, 0, raw_root, get_time, cutoff, rotation_search_step_size, grid, no_prot_h, pocket_only, True, x_rot, y_rot, z_rot) conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") base_conf = list(structure.StructureReader(conformer_file))[0] translate_structure(base_conf, grid[0][0], grid[0][1], grid[0][2]) base_conf_center = list(get_centroid(base_conf)) coords = base_conf.getXYZ(copy=False) new_coords = rotate_structure(coords, math.radians(x_rot), math.radians(y_rot), math.radians(z_rot), base_conf_center) base_conf.setXYZ(new_coords) rmsd_val = rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), base_conf, base_conf.getAtomIndices()) if abs(rmsd_val) == 0: print("Search works properly", rmsd_val) else: print("x_rot =", x_rot, "y_rot =", y_rot, "z_rot =", z_rot) print("RMSD =", rmsd_val, "but RMSD should equal 0")
def main(): parser = argparse.ArgumentParser() parser.add_argument('task', type=str, help='either align or search') parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process') parser.add_argument( 'run_path', type=str, help='directory where script and output files will be written') parser.add_argument('raw_root', type=str, help='directory where raw data will be placed') parser.add_argument('--protein', type=str, default='', help='protein name') parser.add_argument('--target', type=str, default='', help='target ligand name') parser.add_argument('--start', type=str, default='', help='start ligand name') parser.add_argument('--align_n', type=int, default=10, help='number of alignments processed in each job') parser.add_argument('--rotation_search_step_size', type=int, default=1, help='step size between each angle ' 'checked, in degrees') parser.add_argument('--index', type=int, default=-1, help='grid point group index') parser.add_argument( '--rmsd_cutoff', type=int, default=2, help='rmsd accuracy cutoff between predicted ligand pose ' 'and true ligand pose') parser.add_argument('--num_conformers', type=int, default=300, help='maximum number of conformers considered') parser.add_argument('--grid_size', type=int, default=6, help='grid size in positive and negative x, y, z ' 'directions') parser.add_argument('--grid_n', type=int, default=30, help='number of grid_points processed in each job') parser.add_argument('--time', dest='get_time', action='store_true') parser.add_argument('--no_time', dest='get_time', action='store_false') parser.set_defaults(get_time=False) parser.add_argument('--remove_prot_h', dest='no_prot_h', action='store_true') parser.add_argument('--keep_prot_h', dest='no_prot_h', action='store_false') parser.set_defaults(no_prot_h=False) parser.add_argument('--prot_pocket_only', dest='pocket_only', action='store_true') parser.add_argument('--all_prot', dest='pocket_only', action='store_false') parser.set_defaults(pocket_only=False) args = parser.parse_args() random.seed(0) if not os.path.exists(args.run_path): os.mkdir(args.run_path) pair = '{}-to-{}'.format(args.target, args.start) protein_path = os.path.join(args.raw_root, args.protein) pair_path = os.path.join(protein_path, pair) if args.task == 'conformer_all': process = get_prots(args.docked_prot_file) random.shuffle(process) run_conformer_all(process, args.raw_root, args.run_path, args.docked_prot_file) elif args.task == 'conformer_group': target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) gen_ligand_conformers(target_lig_file, pair_path, args.num_conformers) if os.path.exists( os.path.join(pair_path, '{}_lig0.log'.format(args.target))): os.remove( os.path.join(pair_path, '{}_lig0.log'.format(args.target))) if args.task == 'conformer_check': process = get_prots(args.docked_prot_file) random.shuffle(process) run_conformer_check(process, args.raw_root) if args.task == 'align_all': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_all(process, args.raw_root, args.run_path, args.docked_prot_file, args.align_n) elif args.task == 'align_group': grouped_files = get_conformer_groups(args.align_n, args.target, args.start, args.protein, args.raw_root) run_align_group(grouped_files, args.index, args.n, args.protein, args.target, args.start, args.raw_root) elif args.task == 'align_check': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_check(process, args.raw_root) elif args.task == 'align_combine': process = get_prots(args.docked_prot_file) random.shuffle(process) run_align_combine(process, args.raw_root) elif args.task == 'run_search': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) search_system_caller(process, args.raw_root, args.run_path, args.docked_prot_file, args.rotation_search_step_size, args.grid_size, grouped_files) elif args.task == 'search': grouped_files = get_grid_groups(args.grid_size, args.grid_n) run_search(args.protein, args.target, args.start, args.index, args.raw_root, args.get_time, args.rmsd_cutoff, args.rotation_search_step_size, grouped_files[args.index], args.no_prot_h, args.pocket_only) elif args.task == 'check_search': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) counter = 0 unfinished = [] for protein, target, start in process: if counter == 10: break pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) conformer_file = os.path.join(pair_path, "{}_lig0-out.maegz".format(target)) conformers = list(structure.StructureReader(conformer_file)) if len(conformers) == 1: continue else: counter += 1 save_folder = os.path.join( os.getcwd(), 'decoy_timing_data', '{}_{}-to-{}'.format(protein, target, start)) for i in range(len(grouped_files)): if not os.path.exists( os.path.join(save_folder, '{}.csv'.format(i))): unfinished.append((protein, target, start, i)) print("Missing:", len(unfinished)) print(unfinished) elif args.task == 'test_search': run_test_search(args.protein, args.target, args.start, args.raw_root, args.rmsd_cutoff, args.rotation_search_step_size, pair_path, args.no_prot_h, args.pocket_only, args.get_time) elif args.task == 'get_grid_data': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) get_data(process, grouped_files, args.raw_root, args.grid_size) elif args.task == 'combine_search_data': process = get_prots(args.docked_prot_file) random.shuffle(process) grouped_files = get_grid_groups(args.grid_size, args.grid_n) get_data(process, grouped_files, args.raw_root, args.grid_size, True) elif args.task == 'get_dist': process = get_prots(args.docked_prot_file) random.shuffle(process) counter = 0 for protein, target, start in process: if counter == 10: break pair = '{}-to-{}'.format(target, start) protein_path = os.path.join(args.raw_root, protein) pair_path = os.path.join(protein_path, pair) conformer_file = os.path.join(pair_path, "{}_lig0-out.maegz".format(target)) conformers = list(structure.StructureReader(conformer_file)) if len(conformers) == 1: continue else: counter += 1 start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start)) start_lig = list(structure.StructureReader(start_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(target)) target_lig = list(structure.StructureReader(target_lig_file))[0] target_lig_center = list(get_centroid(target_lig)) dist = math.sqrt(( (start_lig_center[0] - target_lig_center[0])**2) + ( (start_lig_center[1] - target_lig_center[1])**2) + ((start_lig_center[2] - target_lig_center[2])**2)) print(protein, target, start, dist) elif args.task == 'test_rotate_translate': prot_file = os.path.join(pair_path, '{}_prot.mae'.format(args.start)) schrodinger_prot = list(structure.StructureReader(prot_file))[0] custom_prot = list(structure.StructureReader(prot_file))[0] translation_vector = np.random.uniform(low=-100, high=100, size=(3)) transform.translate_structure(schrodinger_prot, translation_vector[0], translation_vector[1], translation_vector[2]) translate_structure(custom_prot, translation_vector[0], translation_vector[1], translation_vector[2]) schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False)) custom_atoms = np.array(custom_prot.getXYZ(copy=False)) if np.array_equal(schrodinger_atoms, custom_atoms): print("Translate function works properly") else: print("Error in translate function") schrodinger_prot = list(structure.StructureReader(prot_file))[0] custom_prot = list(structure.StructureReader(prot_file))[0] rotation_vector = np.random.uniform(low=-2 * np.pi, high=2 * np.pi, size=(3)) rotation_center = np.random.uniform(low=-100, high=100, size=(3)) rotation_center = [ rotation_center[0], rotation_center[1], rotation_center[2] ] transform.rotate_structure(schrodinger_prot, rotation_vector[0], rotation_vector[1], rotation_vector[2], rotation_center) coords = rotate_structure(custom_prot.getXYZ(copy=False), rotation_vector[0], rotation_vector[1], rotation_vector[2], rotation_center) custom_prot.setXYZ(coords) schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False)) custom_atoms = np.array(custom_prot.getXYZ(copy=False)) if np.amax(np.absolute(schrodinger_atoms - custom_atoms)) < 10**-7: print("Rotate function works properly") else: print("Error in rotate function") elif args.task == 'get_rmsd': conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae".format( args.target)) conformers = list(structure.StructureReader(conformer_file)) target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) target_lig = list(structure.StructureReader(target_lig_file))[0] build.delete_hydrogens(target_lig) start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(args.start)) start_lig = list(structure.StructureReader(start_lig_file))[0] start_lig_center = list(get_centroid(start_lig)) rmsds = [] for i, conformer in tqdm(enumerate(conformers), desc='going through conformers'): conformer_center = list(get_centroid(conformer)) translate_structure(conformer, start_lig_center[0] - conformer_center[0], start_lig_center[1] - conformer_center[1], start_lig_center[2] - conformer_center[2]) rmsds.append( (conformer, rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()), i)) # best_match_conformer = min(rmsds, key=lambda x: x[1]) # print(best_match_conformer[1], best_match_conformer[2]) # file = os.path.join(pair_path, 'best_match_conformer.mae') # with structure.StructureWriter(file) as best_match: # best_match.append(best_match_conformer[0]) print(rmsds[248][1], rmsds[248][2]) file = os.path.join(pair_path, 'translated_conformer_248.mae') with structure.StructureWriter(file) as best_match: best_match.append(rmsds[248][0]) elif args.task == 'check_rotation': target_lig_file = os.path.join(pair_path, 'ligand_poses', '{}_lig0.mae'.format(args.target)) target_lig = list(structure.StructureReader(target_lig_file))[0] remove = [i for i in target_lig.getAtomIndices() if i != 1] target_lig.deleteAtoms(remove) center = list(get_centroid(target_lig)) print("ROTATE 5,5,5") rotate_structure(target_lig, math.radians(5), math.radians(5), math.radians(5), center) target_lig_2 = list(structure.StructureReader(target_lig_file))[0] target_lig_2.deleteAtoms(remove) center = list(get_centroid(target_lig_2)) print("ROTATE 5,0,0") rotate_structure(target_lig_2, math.radians(5), 0, 0, center) print("ROTATE 0,5,0") rotate_structure(target_lig_2, 0, math.radians(5), 0, center) print("ROTATE 0,0,5") rotate_structure(target_lig_2, 0, 0, math.radians(5), center) print( rmsd.calculate_in_place_rmsd(target_lig, target_lig.getAtomIndices(), target_lig_2, target_lig_2.getAtomIndices())) print(target_lig.getXYZ(copy=False)) print(target_lig_2.getXYZ(copy=False))
def create_conformer_decoys(grid, target_lig, cutoff, rotation_search_step_size, protein, target, start, index, pair_path, test, x_rot, y_rot, z_rot): counter = 0 data_dict = { 'protein': [], 'target': [], 'start': [], 'num_conformers': [], 'num_poses_searched': [], 'num_correct_poses_found': [], 'time_elapsed': [], 'time_elapsed_per_conformer': [], 'grid_loc_x': [], 'grid_loc_y': [], 'grid_loc_z': [] } for grid_loc in grid: num_correct_found = 0 conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) decoy_start_time = time.time() for conformer in conformers: transform.translate_structure(conformer, grid_loc[0], grid_loc[1], grid_loc[2]) conformer_center = list(get_centroid(conformer)) coords = conformer.getXYZ(copy=True) for x in range(-30, 30 + rotation_search_step_size, rotation_search_step_size): for y in range(-30, 30 + rotation_search_step_size, rotation_search_step_size): for z in range(-30, 30 + rotation_search_step_size, rotation_search_step_size): new_coords = rotate_structure(coords, math.radians(x), math.radians(y), math.radians(z), conformer_center) conformer.setXYZ(new_coords) if test and x_rot == x and y_rot == y and z_rot == z: return conformer rmsd_val = rmsd.calculate_in_place_rmsd( conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) if rmsd_val < cutoff: num_correct_found += 1 decoy_end_time = time.time() data_dict['protein'].append(protein) data_dict['target'].append(target) data_dict['start'].append(start) data_dict['num_conformers'].append(len(conformers)) data_dict['num_poses_searched'].append(counter) data_dict['num_correct_poses_found'].append(num_correct_found) data_dict['time_elapsed'].append(decoy_end_time - decoy_start_time) data_dict['time_elapsed_per_conformer'].append( (decoy_end_time - decoy_start_time) / len(conformers)) data_dict['grid_loc_x'].append(grid_loc[0]) data_dict['grid_loc_y'].append(grid_loc[1]) data_dict['grid_loc_z'].append(grid_loc[2]) df = pd.DataFrame.from_dict(data_dict) data_folder = os.path.join(os.getcwd(), 'decoy_timing_data') if not os.path.exists(data_folder): os.mkdir(data_folder) save_folder = os.path.join(data_folder, '{}_{}-to-{}'.format(protein, target, start)) if not os.path.exists(save_folder): os.mkdir(save_folder) df.to_csv(os.path.join(save_folder, '{}.csv'.format(index))) return None
def time_conformer_decoys(pair_path, start_lig_center, target_lig, prot, rotation_search_step_size): translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() transform.rotate_structure( conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average schrodinger translate time =", statistics.mean(translate_times)) print("Average schrodinger rotate time =", statistics.mean(rotate_times)) translate_times = [] rotate_times = [] conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] start = time.time() translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) end = time.time() translate_times.append(end - start) # rotation start = time.time() rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) end = time.time() rotate_times.append(end - start) print("Average custom translate time =", statistics.mean(translate_times)) print("Average custom rotate time =", statistics.mean(rotate_times)) clash_iterator_times = [] clash_volume_times = [] rmsd_times = [] rotation_search_step_size_rad = math.radians(rotation_search_step_size) conformer_file = os.path.join( pair_path, "aligned_to_start_without_hydrogen_conformers.mae") conformers = list(structure.StructureReader(conformer_file)) for conformer in conformers: conformer_center = list(get_centroid(conformer)) # translation grid_loc = [0, 0, 0] translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # keep track of rotation angles rotate_structure(conformer, math.radians(-30 - rotation_search_step_size), 0, 0, conformer_center) x_so_far = -30 - rotation_search_step_size y_so_far = 0 z_so_far = 0 for _ in range(-30, 30, rotation_search_step_size): # x rotation rotate_structure( conformer, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - y_so_far), 0, conformer_center) x_so_far += 1 y_so_far += -30 - rotation_search_step_size - y_so_far for _ in range(-30, 30, rotation_search_step_size): # y rotation rotate_structure( conformer, 0, rotation_search_step_size_rad, math.radians(-30 - rotation_search_step_size - z_so_far), conformer_center) y_so_far += 1 z_so_far += -30 - rotation_search_step_size - z_so_far for _ in range(-30, 30, rotation_search_step_size): # z rotation rotate_structure(conformer, 0, 0, rotation_search_step_size_rad, conformer_center) z_so_far += 1 # get clash_iterator start = time.time() max([ x[2] for x in list( steric_clash.clash_iterator(prot, struc2=conformer)) ]) end = time.time() clash_iterator_times.append(end - start) # get clash_volume start = time.time() steric_clash.clash_volume(prot, struc2=conformer) end = time.time() clash_volume_times.append(end - start) # get rmsd start = time.time() rmsd.calculate_in_place_rmsd(conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) end = time.time() rmsd_times.append(end - start) if len(clash_iterator_times) == 1000: print("Average clash iterator time =", statistics.mean(clash_iterator_times)) print("Average clash volume time =", statistics.mean(clash_volume_times)) print("Average rmsd time =", statistics.mean(rmsd_times)) return
lig, 'C1NNC(C12)CCNC2') #getting the core indices of the ligand for index in indices: #because the nested list gives the rmsd calculation big problems...... lig_core = index xtal_lig = analyze.evaluate_asl( xtal, '(res.ptype "BC7 ")' ) #getting the indices of the coxtal ligand after superposition xtal_lig_struct = (struct._AtomCollection( xtal, xtal_lig)).extractStructure( ) #extracting the coxtal ligand as a separate structure indices_2 = analyze.evaluate_smarts_canvas( xtal_lig_struct, 'c1nnc(c12)CCNC2') #getting core indices of coxtal ligand for index in indices_2: xtal_core = index core_rmsd = rmsd.calculate_in_place_rmsd( xtal_lig_struct, xtal_core, lig, lig_core) #calculating rmsd of cores dataline = [ligand, cen_num, score, core_rmsd] data.append(dataline) data.sort(key=lambda x: x[2]) print(data[0]) for line in data: newline = line[0] + ',' + str(line[1]) + ',' + str( line[2]) + ',' + str(line[3]) + '\n' datafile.write(newline)
def compute_protein_rmsds(paired_strs, protein, start, target, s1, s2, chains, protein_folder): (paired_str_s1, paired_str_s2) = paired_strs[protein][start][target] r_list_s1 = get_all_res(s1, chains, protein) r_list_s2 = get_all_res(s2, chains, protein) r_to_i_map_s1 = map_residues_to_align_index(paired_str_s1, r_list_s1) r_to_i_map_s2 = map_residues_to_align_index(paired_str_s2, r_list_s2) i_to_r_map_s1 = inv_map(r_to_i_map_s1) i_to_r_map_s2 = inv_map(r_to_i_map_s2) pocket_file_s1 = os.path.join(protein_folder, start + '_pocket.mae') pocket_file_s2 = os.path.join(protein_folder, target + '_pocket.mae') pocket_s1 = list(structure.StructureReader(pocket_file_s1))[0] pocket_s2 = list(structure.StructureReader(pocket_file_s2))[0] valid_r_s1 = get_res_near_ligand(r_to_i_map_s1, pocket_s1) valid_r_s2 = get_res_near_ligand(r_to_i_map_s2, pocket_s2) if valid_r_s1 == set({}): print(protein, start, "no residues close to the ligand") return if valid_r_s1 == 0: print(protein, target, "pose viewer file has no ligand") return if valid_r_s2 == set({}): print(protein, start, "no residues close to the ligand") return if valid_r_s2 == 0: print(protein, target, "pose viewer file has no ligand") return print("Calculating") final_r_list_s1 = [] final_r_list_s2 = [] for r in valid_r_s1: s1index = r_to_i_map_s1[r] if paired_str_s1[s1index] == paired_str_s2[s1index]: if r not in final_r_list_s1: final_r_list_s1.append(r) if i_to_r_map_s2[s1index] not in final_r_list_s2: final_r_list_s2.append(i_to_r_map_s2[s1index]) for r in valid_r_s2: s2index = r_to_i_map_s2[r] if paired_str_s2[s2index] == paired_str_s1[s2index]: if r not in final_r_list_s2: final_r_list_s2.append(r) if i_to_r_map_s1[s2index] not in final_r_list_s1: final_r_list_s1.append(i_to_r_map_s1[s2index]) (asl_list_s1, a_list_s1) = get_atoms(s1, final_r_list_s1) (asl_list_s2, a_list_s2) = get_atoms(s2, final_r_list_s2) rmsd_ls = [] for k in range(len(a_list_s1)): if len(a_list_s1[k]) == len(a_list_s2[k]): rmsd_val = rmsd.calculate_in_place_rmsd(s1, a_list_s1[k], s2, a_list_s2[k]) rmsd_ls.append(rmsd_val) return rmsd_ls
def rmsdRef(): lines = [] with open('v92.finalResult', 'r') as f: lines = f.readlines()[1:] seeds = [] energies = [] nativeRMSDs = [] for line in lines: terms = line.split() seeds.append(int(terms[0])) energies.append(float(terms[8])) nativeRMSDs.append(terms[9]) energies, seeds, nativeRMSDs = (list(t) for t in \ zip(*sorted(zip(energies, seeds, nativeRMSDs)))) cwd = os.getcwd() pattern = None with open('v92.con', 'r') as f: conLines = f.readlines() for line in conLines: if 'subjob_control' in line: terms = line.split() pattern = terms[2] structs = [] for i in range(len(seeds)): for dir in os.listdir(os.path.join(cwd, 'subJobs')): if dir.split('_')[0] == str(seeds[i]): os.chdir(os.path.join(cwd, 'subJobs', dir)) if 'plop.stdout' in os.listdir('.'): stName = '4KUZ-p' + str( pattern) + '-' + nativeRMSDs[i] + '_template.maegz' structs.append(next(structure.StructureReader(stName))) os.chdir(cwd) minStruct = copy.deepcopy(structs[0]) ALLINDICES = analyze.evaluate_asl(minStruct, ALLINDICES_asl) LOOPENVINDICES = analyze.evaluate_asl(minStruct, LOOPENVINDICES_asl) NONLOOPINDICES = analyze.evaluate_asl(minStruct, NONLOOPINDICES_asl) rmsds = [] for i in range(0, len(structs)): curStruct = structs[i] rmsd.superimpose(minStruct, NONLOOPINDICES, curStruct, NONLOOPINDICES) RMSD = rmsd.calculate_in_place_rmsd(minStruct, LOOPENVINDICES, curStruct, LOOPENVINDICES) rmsds.append(RMSD) # What about Hbond patterns? hbonds = [] for i in range(0, len(structs)): curStruct = structs[i] hbonds.append(hbond.get_hydrogen_bonds(curStruct, LOOPENVINDICES)) hbondIndices = [] for i in range(0, len(hbonds)): structIndices = [] hbondIndices.append(structIndices) for j in range(0, len(hbonds[i])): pairIndices = [] hbondIndices[i].append(pairIndices) for k in range(0, 2): hbondIndices[i][j].append(hbonds[i][j][k].index) min_hb_indices = copy.deepcopy(hbondIndices[0]) hbond_overlaps = [] for i in range(0, len(hbondIndices)): li1 = [tuple(lst) for lst in min_hb_indices] li2 = [tuple(lst) for lst in hbondIndices[i]] overlap = [] for pair in li1: if pair in li2: overlap.append(pair) sm = difflib.SequenceMatcher(None, li1, li2) hbond_overlaps.append(round(sm.ratio(), 5)) # What about salt bridge interactions? bridges = [] for i in range(0, len(structs)): curStruct = structs[i] bridges.append(salt_bridge.get_salt_bridges(curStruct, LOOPENVINDICES)) bridgeIndices = [] for i in range(0, len(bridges)): structIndices = [] bridgeIndices.append(structIndices) for j in range(0, len(bridges[i])): pairIndices = [] bridgeIndices[i].append(pairIndices) for k in range(0, 2): bridgeIndices[i][j].append(bridges[i][j][k].index) min_bridge_indices = copy.deepcopy(bridgeIndices[0]) salt_bridge_overlaps = [] for i in range(0, len(bridgeIndices)): li1 = [tuple(lst) for lst in min_bridge_indices] li2 = [tuple(lst) for lst in bridgeIndices[i]] overlap = [] for pair in li1: if pair in li2: overlap.append(pair) sm = difflib.SequenceMatcher(None, li1, li2) salt_bridge_overlaps.append(round(sm.ratio(), 5)) # Hydrophobic interactions print('SEED\t\tRMSD\t\tHBOND_OVERLAP\tSALTBR_OVERLAP\tENERGY') for i in range(0, len(rmsds)): print( str(seeds[i]) + '\t\t' + str(round(rmsds[i], 3)) + '\t\t' + str(hbond_overlaps[i] * 100) + '\t\t' + str(salt_bridge_overlaps[i] * 100) + '\t\t' + str(energies[i]))
def compute_protein_rmsds(protein, rmsd_file, combind_root): mcss_data = pd.read_csv( "../../similarity/Data/mcss/{}_mcss.csv".format(protein)) with open(rmsd_file, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow([ 'protein', 'start ligand', 'target ligand', 'name', 'num', 'bfactor', 'normalized bfactor', 'prev prev bfactor', 'prev bfactor', 'next bfactor', 'next next bfactor', 'mol weight', 'general number of rotamers', 'general avg rmsd of rotamers', 'specific number of rotamers', 'specific avg rmsd of rotamers', 'packing', 'solvent accessibility', 'secondary structure', 'ligand similarity', 'ligand similarity ratio', 'ligand size difference', 'ligand size ratio', 'complete rmsd', 'backbone rmsd', 'sidechain rmsd' ]) ligands = get_ligands(protein, max_ligands, combind_root) infile = open( '../../protein_flexibility/Data/alignments/{}_alignment.pkl'. format(protein), 'rb') paired_strs = pickle.load(infile) infile.close() for start in ligands: ASL_to_feature_path = '../Data/feature_vectors_packing/' + protein + '/' + start + '.pkl' if not os.path.exists(ASL_to_feature_path): print(ASL_to_feature_path) continue infile = open(ASL_to_feature_path, 'rb') ASL_to_feature = pickle.load(infile) infile.close() print('Start', start) ending_1 = '{}/structures/aligned_files/{}/{}_out.mae'.format( protein, start, start) s1 = list(structure.StructureReader(combind_root + ending_1))[0] for target in ligands: if start != target: ending_2 = '{}/structures/aligned_files/{}/{}_out.mae'.format( protein, target, target) s2 = list( structure.StructureReader(combind_root + ending_2))[0] if start < target: (paired_str_s1, paired_str_s2) = paired_strs[start][target] else: (paired_str_s2, paired_str_s1) = paired_strs[target][start] (ligSim, ligSimRatio, ligSizeDiff, ligSizeRatio) = ligSimMetrics(start, target, mcss_data) r_list_s1 = get_all_res(s1) r_list_s2 = get_all_res(s2) r_to_i_map_s1 = map_residues_to_align_index( paired_str_s1, r_list_s1) r_to_i_map_s2 = map_residues_to_align_index( paired_str_s2, r_list_s2) i_to_r_map_s1 = inv_map(r_to_i_map_s1) i_to_r_map_s2 = inv_map(r_to_i_map_s2) valid_r_s1 = get_res_near_ligand(s1, r_to_i_map_s1) valid_r_s2 = get_res_near_ligand(s2, r_to_i_map_s2) if valid_r_s1 == set({}): print(protein, start, "no residues close to the ligand") continue if valid_r_s1 == 0: print(protein, target, "pose viewer file has no ligand") continue if valid_r_s2 == set({}): print(protein, start, "no residues close to the ligand") continue if valid_r_s2 == 0: print(protein, target, "pose viewer file has no ligand") continue final_r_list_s1 = [] final_r_list_s2 = [] for r in valid_r_s1: s1index = r_to_i_map_s1[r] if paired_str_s1[s1index] == paired_str_s2[s1index]: if r not in final_r_list_s1: final_r_list_s1.append(r) if i_to_r_map_s2[s1index] not in final_r_list_s2: final_r_list_s2.append(i_to_r_map_s2[s1index]) for r in valid_r_s2: s2index = r_to_i_map_s2[r] if paired_str_s2[s2index] == paired_str_s1[s2index]: if r not in final_r_list_s2: final_r_list_s2.append(r) if i_to_r_map_s1[s2index] not in final_r_list_s1: final_r_list_s1.append(i_to_r_map_s1[s2index]) (asl_list_s1, a_list_s1, backbone_a_list_s1, sidechain_a_list_s1) = get_atoms(s1, final_r_list_s1) (asl_list_s2, a_list_s2, backbone_a_list_s2, sidechain_a_list_s2) = get_atoms(s2, final_r_list_s2) for k in range(len(a_list_s1)): if len(a_list_s1[k]) == len(a_list_s2[k]): rmsd_val = rmsd.calculate_in_place_rmsd( s1, a_list_s1[k], s2, a_list_s2[k]) backbone_rmsd_val = rmsd.calculate_in_place_rmsd( s1, backbone_a_list_s1[k], s2, backbone_a_list_s2[k]) sidechain_rmsd_val = rmsd.calculate_in_place_rmsd( s1, sidechain_a_list_s1[k], s2, sidechain_a_list_s2[k]) feature = ASL_to_feature[asl_list_s1[k]] writer.writerow([ protein, start, target, feature[0], feature[1], feature[2], feature[3], feature[4], feature[5], feature[6], feature[7], feature[8], feature[9], feature[10], feature[11], feature[12], feature[13], feature[14], feature[15], ligSim, ligSimRatio, ligSizeDiff, ligSizeRatio, rmsd_val, backbone_rmsd_val, sidechain_rmsd_val ])
def create_conformer_decoys(save_path, run_path, conformers, grid, num_jobs_submitted, start_lig_center, target_lig, prot, min_angle, max_angle, rmsd_cutoff, protein, target, start, index): conformer_ls = [[c, 0] for c in conformers] rot_ls = [] for rot_x in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_y in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): for rot_z in range(int(math.degrees(min_angle)), int(math.degrees(max_angle)) + 1): rot_ls.append([[ math.radians(rot_x), math.radians(rot_y), math.radians(rot_z) ], 0]) output_file = os.path.join( run_path, '{}_{}_{}_{}.txt'.format(protein, target, start, index)) num_iter_without_pose = 0 num_valid_poses = 0 num_total_poses = 0 while True: num_iter_without_pose += 1 num_total_poses += 1 if num_total_poses % 1000 == 0: num_jobs_in_queue = get_jobs_in_queue('{}{}{}'.format( protein[0], target[0], start[0])) f = open(output_file, "a") f.write( "num_total_poses: {}, len(grid): {}, len(conformer_ls): {}, len(rot_ls): {}, num_jobs_in_queue: " "{}\n".format(num_total_poses, len(grid), len(conformer_ls), len(rot_ls), num_jobs_in_queue)) f.close() if num_jobs_in_queue != num_jobs_submitted: break conformer_index = random.randint(0, len(conformer_ls) - 1) conformer = conformer_ls[conformer_index][0] conformer_center = list(get_centroid(conformer)) # translation index = random.randint(0, len(grid) - 1) grid_loc = grid[index][0] transform.translate_structure( conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0], start_lig_center[1] - conformer_center[1] + grid_loc[1], start_lig_center[2] - conformer_center[2] + grid_loc[2]) conformer_center = list(get_centroid(conformer)) # rotation if len(grid) > 1: x_angle = np.random.uniform(min_angle, max_angle) y_angle = np.random.uniform(min_angle, max_angle) z_angle = np.random.uniform(min_angle, max_angle) else: rot_index = random.randint(0, len(rot_ls) - 1) x_angle, y_angle, z_angle = rot_ls[rot_index][0] transform.rotate_structure(conformer, x_angle, y_angle, z_angle, conformer_center) if steric_clash.clash_volume(prot, struc2=conformer) < 200: num_valid_poses += 1 if rmsd.calculate_in_place_rmsd( conformer, conformer.getAtomIndices(), target_lig, target_lig.getAtomIndices()) < rmsd_cutoff: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) break grid[index][1] = 0 num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) > 1: max_val = max(grid, key=lambda x: x[1]) grid.remove(max_val) num_iter_without_pose = 0 elif num_iter_without_pose == 5 and len(grid) == 1: if len(conformer_ls) == 1 and len(rot_ls) == 1: save_file = os.path.join( save_path, '{}_{}_{}.txt'.format(protein, target, start)) f = open(output_file, "a") f.write("Num poses searched = {}\n".format(num_total_poses)) f.write("Num acceptable clash poses searched = {}\n".format( num_valid_poses)) f.write("No correct poses found\n") f.close() if not os.path.exists(save_file): with open(save_file, 'w') as f: f.write("Num poses searched = {}\n".format( num_total_poses)) f.write("Num acceptable clash poses searched = {}\n". format(num_valid_poses)) f.write("No correct poses found\n") break elif len(conformer_ls) > 1 and (len(rot_ls) == 1 or (len(conformer_ls) + len(rot_ls)) % 2 == 0): max_val = max(conformer_ls, key=lambda x: x[1]) conformer_ls.remove(max_val) else: max_val = max(rot_ls, key=lambda x: x[1]) rot_ls.remove(max_val) num_iter_without_pose = 0 else: grid[index][1] += 1 conformer_ls[conformer_index][1] += 1 if len(grid) == 1: rot_ls[rot_index][1] += 1