Beispiel #1
0
 def write_grid_in_file(self,
                        mode,
                        grid_ligand='',
                        xyz=False,
                        receptor_file=''):
     if mode == 'other_ligand':
         st_2 = next(StructureReader(grid_ligand))
         c2 = get_centroid(st_2)
         x, y, z = c2[:3]
     elif mode == 'xyz':
         x, y, z = xyz
     else:
         st_2 = next(StructureReader(self.path + self.split_ligand))
         c2 = get_centroid(st_2)
         x, y, z = c2[:3]
     if receptor_file == '':
         use_file = self.split_protein
     else:
         use_file = receptor_file
     with open(self.path + self.grid_in, 'w') as f:
         f.write('GRID_CENTER {},{},{}\n'.format(x, y, z))
         f.write('GRIDFILE {}\n'.format(self.grid_file))
         f.write('INNERBOX 15,15,15\n')
         f.write('OUTERBOX 30,30,30\n')
         f.write('RECEP_FILE {}\n'.format(use_file))
def create_conformer_decoys(conformers, grid_size, start_lig_center, prot,
                            pose_path, target, max_poses, min_angle,
                            max_angle):
    num_iter_without_pose = 0
    num_valid_poses = 1
    grid = []
    for dx in range(-grid_size, grid_size):
        for dy in range(-grid_size, grid_size):
            for dz in range(-grid_size, grid_size):
                grid.append([[dx, dy, dz], 0])

    while num_valid_poses < max_poses:
        num_iter_without_pose += 1
        conformer = random.choice(conformers)
        conformer_center = list(get_centroid(conformer))

        # translation
        index = random.randint(0, len(grid) - 1)
        grid_loc = grid[index][0]
        transform.translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        conformer_center = list(get_centroid(conformer))

        # rotation
        x_angle = np.random.uniform(min_angle, max_angle)
        y_angle = np.random.uniform(min_angle, max_angle)
        z_angle = np.random.uniform(min_angle, max_angle)
        transform.rotate_structure(conformer, x_angle, y_angle, z_angle,
                                   conformer_center)

        if steric_clash.clash_volume(prot, struc2=conformer) < 200:
            decoy_file = os.path.join(
                pose_path, "{}_lig{}.mae".format(target, num_valid_poses))
            with structure.StructureWriter(decoy_file) as decoy:
                decoy.append(conformer)
            modify_file(decoy_file, '_pro_ligand')
            modify_file(decoy_file, '{}_lig0.mae'.format(target))
            num_valid_poses += 1
            grid[index][1] = 0
            num_iter_without_pose = 0
        elif num_iter_without_pose == 5 and len(grid) > 1:
            max_val = max(grid, key=lambda x: x[1])
            grid.remove(max_val)
            num_iter_without_pose = 0
        else:
            grid[index][1] += 1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('docked_prot_file', type=str, help='file listing proteins to process')
    parser.add_argument('raw_root', type=str, help='directory where raw data will be placed')
    args = parser.parse_args()

    docking_config = []
    scores = []

    with open(args.docked_prot_file) as fp:
        for line in fp:
            if line[0] == '#': continue
            protein, target, start = line.strip().split()
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path, '{}-to-{}'.format(target, start))
            pose_path = os.path.join(pair_path, 'cartesian_ligand_poses')
            docking_config.append({'folder': pair_path,
                                   'name': '{}-to-{}_cartesian'.format(target, start),
                                   'grid_file': os.path.join(pair_path, '{}-to-{}.zip'.format(target, start)),
                                   'prepped_ligand_file':
                                       os.path.join(pair_path, '{}-to-{}_cartesian_merge_pv.mae'.format(target, start)),
                                   'glide_settings': {'num_poses': 1, 'docking_method': 'inplace'}})

            dock_set = Docking_Set()
            results = dock_set.get_docking_gscores(docking_config, mode='multi')
            results_by_ligand = results['{}-to-{}_cartesian'.format(target, start)]
            for file in results_by_ligand:
                s = list(structure.StructureReader(os.path.join(pose_path, file)))[0]
                scores.append((file, get_centroid(s), score_no_vdW(results_by_ligand[file][0])))
                print(scores)
            break
def create_decoys(lig_file):
    """
    creates MAX_DECOYS number of translated/rotated decoys
    :param lig_file: (string) file of glide ligand pose that will be translated/rotated
    :return:
    """
    code = lig_file.split('/')[-1].split('_')[-1]
    if code == 'lig0.mae':
        modify_file(lig_file, '_pro_ligand')
    else:
        modify_file(lig_file, '_ligand')
    for i in range(MAX_DECOYS):
        s = list(structure.StructureReader(lig_file))[0]

        #translation
        x, y, z = random_three_vector()
        dist = np.random.normal(MEAN_TRANSLATION, STDEV_TRANSLATION)
        transform.translate_structure(s, x * dist, y * dist, z * dist)

        #rotation
        x_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE)
        y_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE)
        z_angle = np.random.uniform(MIN_ANGLE, MAX_ANGLE)
        rot_center = list(get_centroid(s))
        transform.rotate_structure(s, x_angle, y_angle, z_angle, rot_center)

        decoy_file = lig_file[:-4] + chr(ord('a') + i) + '.mae'
        with structure.StructureWriter(decoy_file) as decoy:
            decoy.append(s)
        if code == 'lig0.mae':
            modify_file(decoy_file, lig_file.split('/')[-1])
        else:
            modify_file(decoy_file, lig_file.split('/')[-1])
def run_group_dist_check(grouped_files, raw_root, index, dist_dir, max_poses,
                         max_decoys):
    """
    checks mean distance of displacement for decoys for each protein, target, start group
    :param grouped_files: (list) list of protein, target, start groups
    :param raw_root: (string) directory where raw data will be placed
    :param index: (int) group number
    :param dist_dir: (string) directiory to place distances
    :param max_poses: (int) maximum number of glide poses considered
    :param max_decoys: (int) maximum number of decoys created per glide pose
    :return:
    """
    save = []
    for protein, target, start in grouped_files[index]:
        protein_path = os.path.join(raw_root, protein)
        pair_path = os.path.join(protein_path,
                                 '{}-to-{}'.format(target, start))
        pose_path = os.path.join(pair_path, 'ligand_poses')
        pv_file = os.path.join(pair_path,
                               '{}-to-{}_pv.maegz'.format(target, start))
        num_poses = len(list(structure.StructureReader(pv_file)))
        means = []

        for i in range(num_poses):
            if i == max_poses:
                break
            lig_file = os.path.join(pose_path,
                                    '{}_lig{}.mae'.format(target, i))
            s = list(structure.StructureReader(lig_file))[0]
            c = get_centroid(s)
            dists = []

            for j in range(max_decoys):
                decoy_file = lig_file[:-4] + chr(ord('a') + j) + '.mae'
                decoy = list(structure.StructureReader(decoy_file))[0]
                dists.append(
                    transform.get_vector_magnitude(c - get_centroid(decoy)))

            means.append(statistics.mean(dists))

        save.append(statistics.mean(means))

    outfile = open(os.path.join(dist_dir, '{}.pkl'.format(index)), 'wb')
    pickle.dump(save, outfile)
    print(save)
def run_search(protein,
               target,
               start,
               index,
               raw_root,
               get_time,
               cutoff,
               rotation_search_step_size,
               grid,
               no_prot_h,
               pocket_only,
               test=False,
               x_rot=0,
               y_rot=0,
               z_rot=0):
    pair = '{}-to-{}'.format(target, start)
    protein_path = os.path.join(raw_root, protein)
    pair_path = os.path.join(protein_path, pair)
    start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start))
    start_lig = list(structure.StructureReader(start_lig_file))[0]
    target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                   '{}_lig0.mae'.format(target))
    target_lig = list(structure.StructureReader(target_lig_file))[0]
    build.delete_hydrogens(target_lig)
    start_lig_center = list(get_centroid(start_lig))
    prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start))
    prot = list(structure.StructureReader(prot_file))[0]
    print(prot.atom_total)
    if pocket_only:
        get_pocket_res(prot, target_lig, 6)
        print(prot.atom_total)
    if no_prot_h:
        build.delete_hydrogens(prot)
        print(prot.atom_total)

    if get_time:
        time_conformer_decoys(pair_path, start_lig_center, target_lig, prot,
                              rotation_search_step_size)
    else:
        conformer = create_conformer_decoys(grid, target_lig, cutoff,
                                            rotation_search_step_size, protein,
                                            target, start, index, pair_path,
                                            test, x_rot, y_rot, z_rot)
        return conformer
def run_test_search(protein, target, start, raw_root, cutoff,
                    rotation_search_step_size, pair_path, no_prot_h,
                    pocket_only, get_time):
    angles = [
        i for i in range(-30, 30 +
                         rotation_search_step_size, rotation_search_step_size)
    ]
    angles = angles[:5]
    x_rot = random.choice(angles)
    y_rot = random.choice(angles)
    z_rot = random.choice(angles)
    grid_points = [i for i in range(-6, 7)]
    grid = [[
        random.choice(grid_points),
        random.choice(grid_points),
        random.choice(grid_points)
    ]]

    conformer = run_search(protein, target, start, 0, raw_root, get_time,
                           cutoff, rotation_search_step_size, grid, no_prot_h,
                           pocket_only, True, x_rot, y_rot, z_rot)

    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    base_conf = list(structure.StructureReader(conformer_file))[0]
    translate_structure(base_conf, grid[0][0], grid[0][1], grid[0][2])
    base_conf_center = list(get_centroid(base_conf))
    coords = base_conf.getXYZ(copy=False)
    new_coords = rotate_structure(coords, math.radians(x_rot),
                                  math.radians(y_rot), math.radians(z_rot),
                                  base_conf_center)
    base_conf.setXYZ(new_coords)

    rmsd_val = rmsd.calculate_in_place_rmsd(conformer,
                                            conformer.getAtomIndices(),
                                            base_conf,
                                            base_conf.getAtomIndices())
    if abs(rmsd_val) == 0:
        print("Search works properly", rmsd_val)
    else:
        print("x_rot =", x_rot, "y_rot =", y_rot, "z_rot =", z_rot)
        print("RMSD =", rmsd_val, "but RMSD should equal 0")
Beispiel #8
0
def run_group(grouped_files, raw_root, index, num_clusters):
    for protein, target, start in grouped_files[index]:
        pair = '{}-to-{}'.format(target, start)
        protein_path = os.path.join(raw_root, protein)
        pair_path = os.path.join(protein_path, pair)
        pose_path = os.path.join(pair_path, 'ligand_poses')
        graph_dir = '{}/{}-to-{}_graph.pkl'.format(pair_path, target, start)
        infile = open(graph_dir, 'rb')
        graph_data = pickle.load(infile)
        infile.close()
        centroids = []
        codes_to_include = []
        for i, pdb_code in tqdm(enumerate(graph_data), desc="pdb_codes"):
            if pdb_code.split('_')[-1][:4] != 'lig0' and pdb_code[-1].isalpha(
            ):
                file = os.path.join(pose_path, '{}.mae'.format(pdb_code))
                s = list(structure.StructureReader(file))[0]
                centroids.append((get_centroid(s), pdb_code))
            else:
                codes_to_include.append(pdb_code)
        if len(centroids) > num_clusters:
            X = np.zeros((len(centroids), 3))
            for i in range(len(X)):
                X[i] = centroids[i][0][:3]

            kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(X)
            condensed = {}
            for i, label in enumerate(kmeans.labels_):
                if len(condensed) == num_clusters:
                    break
                if label not in condensed:
                    condensed[label] = centroids[i][1]
            codes_to_include.extend(list(condensed.values()))
        else:
            codes_to_include = list(graph_data.keys())

        outfile = open(
            os.path.join(pair_path, '{}_clustered.pkl'.format(pair)), 'wb')
        pickle.dump(codes_to_include, outfile)
def run_group(protein, target, start, raw_root, save_path, run_path, min_angle,
              max_angle, index, rmsd_cutoff, grid, num_jobs_submitted):
    """
    creates decoys for each protein, target, start group
    :param grouped_files: (list) list of protein, target, start groups
    :param raw_root: (string) directory where raw data will be placed
    :param data_root: (string) pdbbind directory where raw data will be obtained
    :param index: (int) group number
    :param max_poses: (int) maximum number of glide poses considered
    :param decoy_type: (string) either cartesian or random
    :param max_decoys: (int) maximum number of decoys created per glide pose
    :param mean_translation: (float) mean distance decoys are translated
    :param stdev_translation: (float) stdev of distance decoys are translated
    :param min_angle: (float) minimum angle decoys are rotated
    :param max_angle: (float) maximum angle decoys are rotated
    :return:
    """
    pair = '{}-to-{}'.format(target, start)
    protein_path = os.path.join(raw_root, protein)
    pair_path = os.path.join(protein_path, pair)
    start_lig_file = os.path.join(pair_path, '{}_lig.mae'.format(start))
    start_lig = list(structure.StructureReader(start_lig_file))[0]
    target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                   '{}_lig0.mae'.format(target))
    target_lig = list(structure.StructureReader(target_lig_file))[0]
    start_lig_center = list(get_centroid(start_lig))
    prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start))
    prot = list(structure.StructureReader(prot_file))[0]

    aligned_file = os.path.join(pair_path, "aligned_conformers.mae")
    conformers = list(structure.StructureReader(aligned_file))

    create_conformer_decoys(save_path, run_path, conformers, grid,
                            num_jobs_submitted, start_lig_center, target_lig,
                            prot, min_angle, max_angle, rmsd_cutoff, protein,
                            target, start, index)
def create_conformer_decoys(grid, target_lig, cutoff,
                            rotation_search_step_size, protein, target, start,
                            index, pair_path, test, x_rot, y_rot, z_rot):
    counter = 0
    data_dict = {
        'protein': [],
        'target': [],
        'start': [],
        'num_conformers': [],
        'num_poses_searched': [],
        'num_correct_poses_found': [],
        'time_elapsed': [],
        'time_elapsed_per_conformer': [],
        'grid_loc_x': [],
        'grid_loc_y': [],
        'grid_loc_z': []
    }

    for grid_loc in grid:
        num_correct_found = 0
        conformer_file = os.path.join(
            pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
        conformers = list(structure.StructureReader(conformer_file))
        decoy_start_time = time.time()

        for conformer in conformers:
            transform.translate_structure(conformer, grid_loc[0], grid_loc[1],
                                          grid_loc[2])
            conformer_center = list(get_centroid(conformer))
            coords = conformer.getXYZ(copy=True)

            for x in range(-30, 30 + rotation_search_step_size,
                           rotation_search_step_size):
                for y in range(-30, 30 + rotation_search_step_size,
                               rotation_search_step_size):
                    for z in range(-30, 30 + rotation_search_step_size,
                                   rotation_search_step_size):
                        new_coords = rotate_structure(coords, math.radians(x),
                                                      math.radians(y),
                                                      math.radians(z),
                                                      conformer_center)
                        conformer.setXYZ(new_coords)

                        if test and x_rot == x and y_rot == y and z_rot == z:
                            return conformer

                        rmsd_val = rmsd.calculate_in_place_rmsd(
                            conformer, conformer.getAtomIndices(), target_lig,
                            target_lig.getAtomIndices())
                        if rmsd_val < cutoff:
                            num_correct_found += 1

        decoy_end_time = time.time()

        data_dict['protein'].append(protein)
        data_dict['target'].append(target)
        data_dict['start'].append(start)
        data_dict['num_conformers'].append(len(conformers))
        data_dict['num_poses_searched'].append(counter)
        data_dict['num_correct_poses_found'].append(num_correct_found)
        data_dict['time_elapsed'].append(decoy_end_time - decoy_start_time)
        data_dict['time_elapsed_per_conformer'].append(
            (decoy_end_time - decoy_start_time) / len(conformers))
        data_dict['grid_loc_x'].append(grid_loc[0])
        data_dict['grid_loc_y'].append(grid_loc[1])
        data_dict['grid_loc_z'].append(grid_loc[2])

    df = pd.DataFrame.from_dict(data_dict)
    data_folder = os.path.join(os.getcwd(), 'decoy_timing_data')
    if not os.path.exists(data_folder):
        os.mkdir(data_folder)
    save_folder = os.path.join(data_folder,
                               '{}_{}-to-{}'.format(protein, target, start))
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    df.to_csv(os.path.join(save_folder, '{}.csv'.format(index)))
    return None
def time_conformer_decoys(pair_path, start_lig_center, target_lig, prot,
                          rotation_search_step_size):
    translate_times = []
    rotate_times = []
    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    conformers = list(structure.StructureReader(conformer_file))

    for conformer in conformers:
        conformer_center = list(get_centroid(conformer))

        # translation
        grid_loc = [0, 0, 0]
        start = time.time()
        transform.translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        end = time.time()
        translate_times.append(end - start)

        # rotation
        start = time.time()
        transform.rotate_structure(
            conformer, math.radians(-30 - rotation_search_step_size), 0, 0,
            conformer_center)
        end = time.time()
        rotate_times.append(end - start)

    print("Average schrodinger translate time =",
          statistics.mean(translate_times))
    print("Average schrodinger rotate time =", statistics.mean(rotate_times))

    translate_times = []
    rotate_times = []
    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    conformers = list(structure.StructureReader(conformer_file))

    for conformer in conformers:
        conformer_center = list(get_centroid(conformer))

        # translation
        grid_loc = [0, 0, 0]
        start = time.time()
        translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        end = time.time()
        translate_times.append(end - start)

        # rotation
        start = time.time()
        rotate_structure(conformer,
                         math.radians(-30 - rotation_search_step_size), 0, 0,
                         conformer_center)
        end = time.time()
        rotate_times.append(end - start)

    print("Average custom translate time =", statistics.mean(translate_times))
    print("Average custom rotate time =", statistics.mean(rotate_times))

    clash_iterator_times = []
    clash_volume_times = []
    rmsd_times = []
    rotation_search_step_size_rad = math.radians(rotation_search_step_size)

    conformer_file = os.path.join(
        pair_path, "aligned_to_start_without_hydrogen_conformers.mae")
    conformers = list(structure.StructureReader(conformer_file))
    for conformer in conformers:
        conformer_center = list(get_centroid(conformer))

        # translation
        grid_loc = [0, 0, 0]
        translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        conformer_center = list(get_centroid(conformer))

        # keep track of rotation angles
        rotate_structure(conformer,
                         math.radians(-30 - rotation_search_step_size), 0, 0,
                         conformer_center)
        x_so_far = -30 - rotation_search_step_size
        y_so_far = 0
        z_so_far = 0

        for _ in range(-30, 30, rotation_search_step_size):
            # x rotation
            rotate_structure(
                conformer, rotation_search_step_size_rad,
                math.radians(-30 - rotation_search_step_size - y_so_far), 0,
                conformer_center)
            x_so_far += 1
            y_so_far += -30 - rotation_search_step_size - y_so_far

            for _ in range(-30, 30, rotation_search_step_size):
                # y rotation
                rotate_structure(
                    conformer, 0, rotation_search_step_size_rad,
                    math.radians(-30 - rotation_search_step_size - z_so_far),
                    conformer_center)
                y_so_far += 1
                z_so_far += -30 - rotation_search_step_size - z_so_far

                for _ in range(-30, 30, rotation_search_step_size):
                    # z rotation
                    rotate_structure(conformer, 0, 0,
                                     rotation_search_step_size_rad,
                                     conformer_center)
                    z_so_far += 1

                    # get clash_iterator
                    start = time.time()
                    max([
                        x[2] for x in list(
                            steric_clash.clash_iterator(prot,
                                                        struc2=conformer))
                    ])
                    end = time.time()
                    clash_iterator_times.append(end - start)

                    # get clash_volume
                    start = time.time()
                    steric_clash.clash_volume(prot, struc2=conformer)
                    end = time.time()
                    clash_volume_times.append(end - start)

                    # get rmsd
                    start = time.time()
                    rmsd.calculate_in_place_rmsd(conformer,
                                                 conformer.getAtomIndices(),
                                                 target_lig,
                                                 target_lig.getAtomIndices())
                    end = time.time()
                    rmsd_times.append(end - start)

                    if len(clash_iterator_times) == 1000:
                        print("Average clash iterator time =",
                              statistics.mean(clash_iterator_times))
                        print("Average clash volume time =",
                              statistics.mean(clash_volume_times))
                        print("Average rmsd time =",
                              statistics.mean(rmsd_times))
                        return
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('task',
                        type=str,
                        help='either run, check, remove_pv, or MAPK14')
    parser.add_argument('docked_prot_file',
                        type=str,
                        help='file listing proteins to process')
    parser.add_argument(
        'run_path',
        type=str,
        help='directory where script and output files will be written')
    parser.add_argument('raw_root',
                        type=str,
                        help='directory where raw data will be placed')
    parser.add_argument('--new_prot_file',
                        type=str,
                        default=os.path.join(os.getcwd(), 'index.txt'),
                        help='for update task, name of new prot file')
    args = parser.parse_args()

    if args.task == 'run':
        # process = get_prots(args.docked_prot_file)
        process = [('P04746', '3old', '1xd0')]
        grouped_files = group_files(N, process)

        for i, group in enumerate(grouped_files):
            with open(os.path.join(args.run_path, 'grid{}_in.sh'.format(i)),
                      'w') as f:
                for protein, target, start in group:
                    protein_path = os.path.join(args.raw_root, protein)
                    pair_path = os.path.join(protein_path,
                                             '{}-to-{}'.format(target, start))
                    pose_path = os.path.join(pair_path, 'ligand_poses')

                    with open(
                            os.path.join(pair_path,
                                         '{}-to-{}.in'.format(target, start)),
                            'w') as f_in:
                        c = get_centroid(
                            list(
                                structure.StructureReader(
                                    os.path.join(
                                        pose_path,
                                        '{}_lig0.mae'.format(target))))[0])
                        x, y, z = c[:3]

                        f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z))
                        f_in.write('GRIDFILE {}-to-{}.zip\n'.format(
                            target, start))
                        f_in.write('INNERBOX 15,15,15\n')
                        f_in.write('OUTERBOX 30,30,30\n')
                        f_in.write('RECEP_FILE {}\n'.format(
                            os.path.join(pair_path,
                                         '{}_prot.mae'.format(start))))

                        f.write('#!/bin/bash\n')
                        f.write('cd {}\n'.format(pair_path))
                        f.write(
                            '$SCHRODINGER/glide -WAIT {}-to-{}.in\n'.format(
                                target, start))
                        f.write('rm {}-to-{}.in'.format(target, start))
                        f.write('rm {}-to-{}.log'.format(target, start))

            os.chdir(args.run_path)
            os.system(
                'sbatch -p owners -t 00:30:00 -o grid{}.out grid{}_in.sh'.
                format(i, i))

    if args.task == 'check':
        process = []
        num_pairs = 0
        with open(args.docked_prot_file) as fp:
            for line in tqdm(
                    fp, desc='going through protein, target, start groups'):
                if line[0] == '#': continue
                protein, target, start = line.strip().split()
                num_pairs += 1
                protein_path = os.path.join(args.raw_root, protein)
                pair_path = os.path.join(protein_path,
                                         '{}-to-{}'.format(target, start))

                # check basic files
                if not os.path.exists('{}/{}-to-{}.zip'.format(
                        pair_path, target, start)):
                    process.append((protein, target, start))

        print('Missing', len(process), '/', num_pairs)
        print(process)

    if args.task == 'update':
        text = []
        with open(args.docked_prot_file) as fp:
            for line in tqdm(fp, desc='files'):
                if line[0] == '#': continue
                protein, target, start = line.strip().split()
                protein_path = os.path.join(args.raw_root, protein)
                pair_path = os.path.join(protein_path,
                                         '{}-to-{}'.format(target, start))
                if os.path.exists('{}/{}-to-{}.zip'.format(
                        pair_path, target, start)):
                    text.append(line)

        file = open(args.new_prot_file, "w")
        file.writelines(text)
        file.close()
def run(grouped_files, run_path, raw_root, decoy_type):
    """
    creates grid for each protein, target, start
    :param grouped_files: (list) list of protein, target, start groups
    :param run_path: (string) directory where script and output files will be written
    :param raw_root: (string) directory where raw data will be placed
    :return:
    """
    for i, group in enumerate(grouped_files):
        with open(os.path.join(run_path, 'grid{}_in.sh'.format(i)), 'w') as f:
            print(os.path.join(run_path, 'grid{}_in.sh'.format(i)))
            for protein, target, start in group:
                pair = '{}-to-{}'.format(target, start)
                target_pair = '{}-to-{}'.format(target, target)
                protein_path = os.path.join(raw_root, protein)
                pair_path = os.path.join(protein_path, pair)
                pose_path = os.path.join(pair_path, decoy_type)
                print(pair_path)

                # create in file for grid
                if not os.path.exists(
                        os.path.join(pair_path, '{}.zip'.format(pair))):
                    with open(os.path.join(pair_path, '{}.in'.format(pair)),
                              'w') as f_in:
                        c = get_centroid(
                            list(
                                structure.StructureReader(
                                    os.path.join(
                                        pose_path,
                                        '{}_lig0.mae'.format(target))))[0])
                        x, y, z = c[:3]

                        f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z))
                        f_in.write('GRIDFILE {}.zip\n'.format(pair))
                        f_in.write('INNERBOX 15,15,15\n')
                        f_in.write('OUTERBOX 30,30,30\n')
                        f_in.write('RECEP_FILE {}\n'.format(
                            os.path.join(pair_path,
                                         '{}_prot.mae'.format(start))))
                        # create grid commands
                        f.write('#!/bin/bash\n')
                        f.write('cd {}\n'.format(pair_path))
                        f.write(
                            '$SCHRODINGER/glide -WAIT {}.in\n'.format(pair))
                        f.write('rm {}/{}.in\n'.format(pair_path, pair))
                        f.write('rm {}/{}.log\n'.format(pair_path, pair))

                if not os.path.exists(
                        os.path.join(pair_path, '{}.zip'.format(target_pair))):
                    print('hi')
                    with open(
                            os.path.join(pair_path,
                                         '{}.in'.format(target_pair)),
                            'w') as f_in:
                        c = get_centroid(
                            list(
                                structure.StructureReader(
                                    os.path.join(
                                        pose_path,
                                        '{}_lig0.mae'.format(target))))[0])
                        x, y, z = c[:3]

                        f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z))
                        f_in.write('GRIDFILE {}.zip\n'.format(target_pair))
                        f_in.write('INNERBOX 15,15,15\n')
                        f_in.write('OUTERBOX 30,30,30\n')
                        f_in.write('RECEP_FILE {}\n'.format(
                            os.path.join(pair_path,
                                         '{}_prot.mae'.format(target))))
                        # create grid commands
                        f.write('#!/bin/bash\n')
                        f.write('cd {}\n'.format(pair_path))
                        f.write('$SCHRODINGER/glide -WAIT {}.in\n'.format(
                            target_pair))
                        f.write('rm {}/{}.in\n'.format(pair_path, target_pair))
                        f.write('rm {}/{}.log\n'.format(
                            pair_path, target_pair))
                break

        os.chdir(run_path)
        os.system(
            'sbatch -p rondror -t 02:00:00 -o grid{}.out grid{}_in.sh'.format(
                i, i))
        # print('sbatch -p owners -t 02:00:00 -o grid{}.out grid{}_in.sh'.format(i, i))
        break
def create_conformer_decoys(save_path, run_path, conformers, grid,
                            num_jobs_submitted, start_lig_center, target_lig,
                            prot, min_angle, max_angle, rmsd_cutoff, protein,
                            target, start, index):
    conformer_ls = [[c, 0] for c in conformers]

    rot_ls = []
    for rot_x in range(int(math.degrees(min_angle)),
                       int(math.degrees(max_angle)) + 1):
        for rot_y in range(int(math.degrees(min_angle)),
                           int(math.degrees(max_angle)) + 1):
            for rot_z in range(int(math.degrees(min_angle)),
                               int(math.degrees(max_angle)) + 1):
                rot_ls.append([[
                    math.radians(rot_x),
                    math.radians(rot_y),
                    math.radians(rot_z)
                ], 0])

    output_file = os.path.join(
        run_path, '{}_{}_{}_{}.txt'.format(protein, target, start, index))
    num_iter_without_pose = 0
    num_valid_poses = 0
    num_total_poses = 0

    while True:
        num_iter_without_pose += 1
        num_total_poses += 1
        if num_total_poses % 1000 == 0:
            num_jobs_in_queue = get_jobs_in_queue('{}{}{}'.format(
                protein[0], target[0], start[0]))
            f = open(output_file, "a")
            f.write(
                "num_total_poses: {}, len(grid): {}, len(conformer_ls): {}, len(rot_ls): {}, num_jobs_in_queue: "
                "{}\n".format(num_total_poses, len(grid), len(conformer_ls),
                              len(rot_ls), num_jobs_in_queue))
            f.close()
            if num_jobs_in_queue != num_jobs_submitted:
                break
        conformer_index = random.randint(0, len(conformer_ls) - 1)
        conformer = conformer_ls[conformer_index][0]
        conformer_center = list(get_centroid(conformer))

        # translation
        index = random.randint(0, len(grid) - 1)
        grid_loc = grid[index][0]
        transform.translate_structure(
            conformer, start_lig_center[0] - conformer_center[0] + grid_loc[0],
            start_lig_center[1] - conformer_center[1] + grid_loc[1],
            start_lig_center[2] - conformer_center[2] + grid_loc[2])
        conformer_center = list(get_centroid(conformer))

        # rotation
        if len(grid) > 1:
            x_angle = np.random.uniform(min_angle, max_angle)
            y_angle = np.random.uniform(min_angle, max_angle)
            z_angle = np.random.uniform(min_angle, max_angle)
        else:
            rot_index = random.randint(0, len(rot_ls) - 1)
            x_angle, y_angle, z_angle = rot_ls[rot_index][0]
        transform.rotate_structure(conformer, x_angle, y_angle, z_angle,
                                   conformer_center)

        if steric_clash.clash_volume(prot, struc2=conformer) < 200:
            num_valid_poses += 1
            if rmsd.calculate_in_place_rmsd(
                    conformer, conformer.getAtomIndices(), target_lig,
                    target_lig.getAtomIndices()) < rmsd_cutoff:
                save_file = os.path.join(
                    save_path, '{}_{}_{}.txt'.format(protein, target, start))
                f = open(output_file, "a")
                f.write("Num poses searched = {}\n".format(num_total_poses))
                f.write("Num acceptable clash poses searched = {}\n".format(
                    num_valid_poses))
                f.close()
                if not os.path.exists(save_file):
                    with open(save_file, 'w') as f:
                        f.write("Num poses searched = {}\n".format(
                            num_total_poses))
                        f.write("Num acceptable clash poses searched = {}\n".
                                format(num_valid_poses))
                break
            grid[index][1] = 0
            num_iter_without_pose = 0
        elif num_iter_without_pose == 5 and len(grid) > 1:
            max_val = max(grid, key=lambda x: x[1])
            grid.remove(max_val)
            num_iter_without_pose = 0
        elif num_iter_without_pose == 5 and len(grid) == 1:
            if len(conformer_ls) == 1 and len(rot_ls) == 1:
                save_file = os.path.join(
                    save_path, '{}_{}_{}.txt'.format(protein, target, start))
                f = open(output_file, "a")
                f.write("Num poses searched = {}\n".format(num_total_poses))
                f.write("Num acceptable clash poses searched = {}\n".format(
                    num_valid_poses))
                f.write("No correct poses found\n")
                f.close()
                if not os.path.exists(save_file):
                    with open(save_file, 'w') as f:
                        f.write("Num poses searched = {}\n".format(
                            num_total_poses))
                        f.write("Num acceptable clash poses searched = {}\n".
                                format(num_valid_poses))
                        f.write("No correct poses found\n")
                break
            elif len(conformer_ls) > 1 and (len(rot_ls) == 1 or
                                            (len(conformer_ls) + len(rot_ls)) %
                                            2 == 0):
                max_val = max(conformer_ls, key=lambda x: x[1])
                conformer_ls.remove(max_val)
            else:
                max_val = max(rot_ls, key=lambda x: x[1])
                rot_ls.remove(max_val)
            num_iter_without_pose = 0
        else:
            grid[index][1] += 1
            conformer_ls[conformer_index][1] += 1
            if len(grid) == 1:
                rot_ls[rot_index][1] += 1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('task',
                        type=str,
                        help='either all, group, check, or MAPK14')
    parser.add_argument('docked_prot_file',
                        type=str,
                        help='file listing proteins to process')
    parser.add_argument(
        'run_path',
        type=str,
        help='directory where script and output files will be written')
    parser.add_argument('raw_root',
                        type=str,
                        help='directory where raw data will be placed')
    parser.add_argument('--index',
                        type=int,
                        default=-1,
                        help='for group task, group number')
    parser.add_argument(
        '--dist_dir',
        type=str,
        default=os.path.join(os.getcwd(), 'dists'),
        help=
        'for all_dist_check and group_dist_check task, directiory to place distances'
    )
    parser.add_argument(
        '--name_dir',
        type=str,
        default=os.path.join(os.getcwd(), 'names'),
        help=
        'for all_name_check and group_name_check task, directiory to place unfinished protein, '
        'target, start groups')
    args = parser.parse_args()

    if args.task == 'all':
        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)

        if not os.path.exists(args.run_path):
            os.mkdir(args.run_path)

        for i, group in enumerate(grouped_files):
            cmd = 'sbatch -p owners -t 1:00:00 -o {} --wrap="$SCHRODINGER/run python3 decoy_creator.py group {} {} {} ' \
                  '--index {}"'
            os.system(
                cmd.format(
                    os.path.join(args.run_path, 'decoy{}.out'.format(i)),
                    args.docked_prot_file, args.run_path, args.raw_root, i))

    if args.task == 'group':
        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)

        for protein, target, start in grouped_files[args.index]:
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path,
                                     '{}-to-{}'.format(target, start))
            pose_path = os.path.join(pair_path, 'cartesian_ligand_poses')
            pv_file = os.path.join(
                pair_path, '{}-to-{}_glide_pv.maegz'.format(target, start))
            num_poses = len(list(structure.StructureReader(pv_file)))
            print(num_poses)

            for i in range(num_poses):
                if i == MAX_POSES:
                    break
                lig_file = os.path.join(pose_path,
                                        '{}_lig{}.mae'.format(target, i))
                create_cartesian_decoys(lig_file)
            break

    if args.task == 'check':
        process = []
        num_pairs = 0
        with open(args.docked_prot_file) as fp:
            for line in tqdm(
                    fp, desc='going through protein, target, start groups'):
                if line[0] == '#': continue
                protein, target, start = line.strip().split()
                num_pairs += 1
                protein_path = os.path.join(args.raw_root, protein)
                pair_path = os.path.join(protein_path,
                                         '{}-to-{}'.format(target, start))
                pose_path = os.path.join(pair_path, 'ligand_poses')
                pv_file = os.path.join(
                    pair_path, '{}-to-{}_pv.maegz'.format(target, start))

                # num_poses = min(MAX_POSES, len(list(structure.StructureReader(pv_file))))
                num_poses = 0
                for i in range(MAX_DECOYS):
                    if not os.path.join(
                            pose_path, '{}_lig{}.mae'.format(
                                target,
                                str(num_poses) + chr(ord('a') + i))):
                        process.append((protein, target, start))
                        print(
                            os.path.join(
                                pose_path, '{}_lig{}.mae'.format(
                                    target,
                                    str(num_poses) + chr(ord('a') + i))))
                        break

        print('Missing', len(process), '/', num_pairs)
        print(process)

    if args.task == 'all_dist_check':
        # if not os.path.exists(args.dist_dir):
        #     os.mkdir(args.dist_dir)
        #
        # process = get_prots(args.docked_prot_file)
        # grouped_files = group_files(N, process)

        groups = [
            31, 32, 151, 176, 186, 187, 189, 194, 195, 198, 225, 226, 322, 332,
            333, 341, 343, 452, 453, 460, 487, 495
        ]

        if not os.path.exists(args.run_path):
            os.mkdir(args.run_path)

        # for i, group in enumerate(grouped_files):
        for i in groups:
            cmd = 'sbatch -p owners -t 0:20:00 -o {} --wrap="$SCHRODINGER/run python3 decoy_creator.py group_dist_check {} {} {} ' \
                  '--index {}"'
            os.system(
                cmd.format(
                    os.path.join(args.run_path, 'decoy{}.out'.format(i)),
                    args.docked_prot_file, args.run_path, args.raw_root, i))

    if args.task == 'group_dist_check':
        if not os.path.exists(args.dist_dir):
            os.mkdir(args.dist_dir)

        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)
        save = []

        for protein, target, start in grouped_files[args.index]:
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path,
                                     '{}-to-{}'.format(target, start))
            pose_path = os.path.join(pair_path, 'ligand_poses')
            pv_file = os.path.join(pair_path,
                                   '{}-to-{}_pv.maegz'.format(target, start))
            num_poses = len(list(structure.StructureReader(pv_file)))
            means = []

            for i in range(num_poses):
                if i == MAX_POSES:
                    break
                lig_file = os.path.join(pose_path,
                                        '{}_lig{}.mae'.format(target, i))
                s = list(structure.StructureReader(lig_file))[0]
                c = get_centroid(s)
                dists = []

                for j in range(MAX_DECOYS):
                    decoy_file = lig_file[:-4] + chr(ord('a') + j) + '.mae'
                    decoy = list(structure.StructureReader(decoy_file))[0]
                    dists.append(
                        transform.get_vector_magnitude(c -
                                                       get_centroid(decoy)))

                means.append(statistics.mean(dists))

            save.append(statistics.mean(means))

        outfile = open(
            os.path.join(args.dist_dir, '{}.pkl'.format(args.index)), 'wb')
        pickle.dump(save, outfile)
        print(save)

    if args.task == 'check_dist_check':
        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)

        if len(os.listdir(args.dist_dir)) != len(grouped_files):
            print('Not all files created')
        else:
            print('All files created')

        errors = []
        for i in range(len(grouped_files)):
            infile = open(os.path.join(args.dist_dir, '{}.pkl'.format(i)),
                          'rb')
            vals = pickle.load(infile)
            infile.close()

            for j in vals:
                if j > 2 or j < -1:
                    print(vals)
                    errors.append(i)
                    break

        print('Potential errors', len(errors), '/', len(grouped_files))
        print(errors)

    if args.task == 'all_name_check':
        if not os.path.exists(args.name_dir):
            os.mkdir(args.name_dir)

        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)

        if not os.path.exists(args.run_path):
            os.mkdir(args.run_path)

        for i, group in enumerate(grouped_files):
            cmd = 'sbatch -p owners -t 0:20:00 -o {} --wrap="$SCHRODINGER/run python3 decoy_creator.py group_name_check {} {} {} ' \
                  '--index {}"'
            os.system(
                cmd.format(os.path.join(args.run_path, 'name{}.out'.format(i)),
                           args.docked_prot_file, args.run_path, args.raw_root,
                           i))

    if args.task == 'group_name_check':
        if not os.path.exists(args.name_dir):
            os.mkdir(args.name_dir)

        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)
        unfinished = []

        for protein, target, start in grouped_files[args.index]:
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path,
                                     '{}-to-{}'.format(target, start))
            pose_path = os.path.join(pair_path, 'ligand_poses')
            pv_file = os.path.join(
                pair_path, '{}-to-{}_glide_pv.maegz'.format(target, start))
            num_poses = len(list(structure.StructureReader(pv_file)))

            for i in range(num_poses):
                if i == MAX_POSES:
                    break
                lig_file = os.path.join(pose_path,
                                        '{}_lig{}.mae'.format(target, i))
                found = False
                with open(lig_file, "r") as f:
                    file_name = lig_file.split('/')[-1]
                    for line in f:
                        if line.strip() == file_name:
                            found = True
                if not found:
                    print(lig_file)
                    unfinished.append((protein, target, start))
                    break
                else:
                    for j in range(MAX_DECOYS):
                        decoy_file = lig_file[:-4] + chr(ord('a') + j) + '.mae'
                        found = False
                        with open(decoy_file, "r") as f:
                            file_name = decoy_file.split('/')[-1]
                            for line in f:
                                if line.strip() == file_name:
                                    found = True
                        if not found:
                            print(decoy_file)
                            unfinished.append((protein, target, start))
                            break
                if not found:
                    break
            break

        # outfile = open(os.path.join(args.name_dir, '{}.pkl'.format(args.index)), 'wb')
        # pickle.dump(unfinished, outfile)
        print(unfinished)

    if args.task == 'check_name_check':
        process = get_prots(args.docked_prot_file)
        grouped_files = group_files(N, process)

        if len(os.listdir(args.name_dir)) != len(grouped_files):
            print('Not all files created')
        else:
            print('All files created')

        errors = []
        for i in range(len(grouped_files)):
            infile = open(os.path.join(args.name_dir, '{}.pkl'.format(i)),
                          'rb')
            unfinished = pickle.load(infile)
            infile.close()
            errors.extend(unfinished)

        print('Errors', len(errors), '/', len(process))
        print(errors)

    if args.task == 'MAPK14':
        protein = 'MAPK14'
        ligs = ['3D83', '4F9Y']
        for target in ligs:
            for start in ligs:
                if target != start:
                    file = os.path.join(
                        args.raw_root, '{}/{}-to-{}/{}-to-{}_pv.maegz'.format(
                            protein, target, start, target, start))
                    num_poses = len(list(structure.StructureReader(file)))
                    for i in range(num_poses):
                        if i == 101:
                            break
                        lig_file = '{}/{}/{}-to-{}/{}_lig{}.mae'.format(
                            args.raw_root, protein, target, start, target, i)
                        create_decoys(lig_file)
def run_group(grouped_files, raw_root, data_root, index, max_poses, decoy_type,
              max_decoys, mean_translation, stdev_translation, min_angle,
              max_angle, num_conformers, grid_size):
    """
    creates decoys for each protein, target, start group
    :param grouped_files: (list) list of protein, target, start groups
    :param raw_root: (string) directory where raw data will be placed
    :param data_root: (string) pdbbind directory where raw data will be obtained
    :param index: (int) group number
    :param max_poses: (int) maximum number of glide poses considered
    :param decoy_type: (string) either cartesian or random
    :param max_decoys: (int) maximum number of decoys created per glide pose
    :param mean_translation: (float) mean distance decoys are translated
    :param stdev_translation: (float) stdev of distance decoys are translated
    :param min_angle: (float) minimum angle decoys are rotated
    :param max_angle: (float) maximum angle decoys are rotated
    :return:
    """
    for protein, target, start in grouped_files[index]:
        pair = '{}-to-{}'.format(target, start)
        protein_path = os.path.join(raw_root, protein)
        pair_path = os.path.join(protein_path, pair)
        pose_path = os.path.join(pair_path, decoy_type)
        dock_root = os.path.join(data_root,
                                 '{}/docking/sp_es4/{}'.format(protein, pair))
        struct_root = os.path.join(data_root,
                                   '{}/structures/aligned'.format(protein))

        # create folders
        if not os.path.exists(raw_root):
            os.mkdir(raw_root)
        if not os.path.exists(protein_path):
            os.mkdir(protein_path)
        if not os.path.exists(pair_path):
            os.mkdir(pair_path)
        if not os.path.exists(pose_path):
            os.mkdir(pose_path)

        # add basic files
        if not os.path.exists('{}/{}_prot.mae'.format(pair_path, start)):
            os.system('cp {}/{}_prot.mae {}/{}_prot.mae'.format(
                struct_root, start, pair_path, start))
        if not os.path.exists('{}/{}_prot.mae'.format(pair_path, target)):
            os.system('cp {}/{}_prot.mae {}/{}_prot.mae'.format(
                struct_root, target, pair_path, target))
        if not os.path.exists('{}/{}_lig.mae'.format(pair_path, start)):
            os.system('cp {}/{}_lig.mae {}/{}_lig.mae'.format(
                struct_root, start, pair_path, start))
        if not os.path.exists('{}/{}_lig0.mae'.format(pose_path, target)):
            os.system('cp {}/{}_lig.mae {}/{}_lig0.mae'.format(
                struct_root, target, pose_path, target))
        modify_file('{}/{}_lig0.mae'.format(pose_path, target), '_pro_ligand')

        # add combine glide poses
        pv_file = '{}/{}_glide_pv.maegz'.format(pair_path, pair)
        if not os.path.exists(pv_file):
            os.system('cp {}/{}_pv.maegz {}'.format(dock_root, pair, pv_file))

        if decoy_type == "ligand_poses" or decoy_type == "cartesian_poses":
            # extract glide poses and create decoys
            num_poses = len(list(structure.StructureReader(pv_file)))
            for i in range(num_poses):
                if i == max_poses:
                    break
                lig_file = os.path.join(pose_path,
                                        '{}_lig{}.mae'.format(target, i))
                if i != 0:
                    with structure.StructureWriter(lig_file) as all_file:
                        all_file.append(
                            list(structure.StructureReader(pv_file))[i])
                if decoy_type == 'cartesian_poses':
                    create_cartesian_decoys(lig_file)
                elif decoy_type == 'ligand_poses':
                    create_decoys(lig_file, max_decoys, mean_translation,
                                  stdev_translation, min_angle, max_angle)

        elif decoy_type == "conformer_poses":
            start_lig_file = os.path.join(pair_path,
                                          '{}_lig.mae'.format(start))
            start_lig = list(structure.StructureReader(start_lig_file))[0]
            target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                           '{}_lig0.mae'.format(target))
            start_lig_center = list(get_centroid(start_lig))
            prot_file = os.path.join(pair_path, '{}_prot.mae'.format(start))
            prot = list(structure.StructureReader(prot_file))[0]

            aligned_file = os.path.join(pair_path, "aligned_conformers.mae")
            if not os.path.exists(aligned_file):
                if not os.path.exists(
                        os.path.join(pair_path,
                                     "{}_lig0-out.maegz".format(target))):
                    gen_ligand_conformers(target_lig_file, pair_path,
                                          num_conformers)
                conformer_file = os.path.join(
                    pair_path, "{}_lig0-out.maegz".format(target))
                get_aligned_conformers(conformer_file, target_lig_file,
                                       aligned_file)

            conformers = list(structure.StructureReader(aligned_file))
            create_conformer_decoys(conformers, grid_size, start_lig_center,
                                    prot, pose_path, target, max_poses,
                                    min_angle, max_angle)
            if os.path.exists(
                    os.path.join(pair_path, '{}_lig0.log'.format(target))):
                os.remove(os.path.join(pair_path,
                                       '{}_lig0.log'.format(target)))
            if os.path.exists(
                    os.path.join(pair_path,
                                 "{}_lig0-out.maegz".format(target))):
                os.remove(
                    os.path.join(pair_path,
                                 "{}_lig0-out.maegz".format(target)))

        # combine ligands
        if os.path.exists('{}/{}_{}_merge_pv.mae'.format(
                pair_path, pair, decoy_type)):
            os.remove('{}/{}_{}_merge_pv.mae'.format(pair_path, pair,
                                                     decoy_type))
        with structure.StructureWriter('{}/{}_{}_merge_pv.mae'.format(
                pair_path, pair, decoy_type)) as all_file:
            for file in os.listdir(pose_path):
                if file[-3:] == 'mae':
                    pv = list(
                        structure.StructureReader(os.path.join(
                            pose_path, file)))
                    all_file.append(pv[0])

        # compute mcss
        if not os.path.exists(
                os.path.join(pair_path, '{}_mcss.csv'.format(pair))):
            compute_protein_mcss([target, start], pair_path)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('task', type=str, help='either align or search')
    parser.add_argument('docked_prot_file',
                        type=str,
                        help='file listing proteins to process')
    parser.add_argument(
        'run_path',
        type=str,
        help='directory where script and output files will be written')
    parser.add_argument('raw_root',
                        type=str,
                        help='directory where raw data will be placed')
    parser.add_argument('--protein', type=str, default='', help='protein name')
    parser.add_argument('--target',
                        type=str,
                        default='',
                        help='target ligand name')
    parser.add_argument('--start',
                        type=str,
                        default='',
                        help='start ligand name')
    parser.add_argument('--align_n',
                        type=int,
                        default=10,
                        help='number of alignments processed in each job')
    parser.add_argument('--rotation_search_step_size',
                        type=int,
                        default=1,
                        help='step size between each angle '
                        'checked, in degrees')
    parser.add_argument('--index',
                        type=int,
                        default=-1,
                        help='grid point group index')
    parser.add_argument(
        '--rmsd_cutoff',
        type=int,
        default=2,
        help='rmsd accuracy cutoff between predicted ligand pose '
        'and true ligand pose')
    parser.add_argument('--num_conformers',
                        type=int,
                        default=300,
                        help='maximum number of conformers considered')
    parser.add_argument('--grid_size',
                        type=int,
                        default=6,
                        help='grid size in positive and negative x, y, z '
                        'directions')
    parser.add_argument('--grid_n',
                        type=int,
                        default=30,
                        help='number of grid_points processed in each job')
    parser.add_argument('--time', dest='get_time', action='store_true')
    parser.add_argument('--no_time', dest='get_time', action='store_false')
    parser.set_defaults(get_time=False)
    parser.add_argument('--remove_prot_h',
                        dest='no_prot_h',
                        action='store_true')
    parser.add_argument('--keep_prot_h',
                        dest='no_prot_h',
                        action='store_false')
    parser.set_defaults(no_prot_h=False)
    parser.add_argument('--prot_pocket_only',
                        dest='pocket_only',
                        action='store_true')
    parser.add_argument('--all_prot', dest='pocket_only', action='store_false')
    parser.set_defaults(pocket_only=False)

    args = parser.parse_args()

    random.seed(0)

    if not os.path.exists(args.run_path):
        os.mkdir(args.run_path)

    pair = '{}-to-{}'.format(args.target, args.start)
    protein_path = os.path.join(args.raw_root, args.protein)
    pair_path = os.path.join(protein_path, pair)

    if args.task == 'conformer_all':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_conformer_all(process, args.raw_root, args.run_path,
                          args.docked_prot_file)

    elif args.task == 'conformer_group':
        target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                       '{}_lig0.mae'.format(args.target))
        gen_ligand_conformers(target_lig_file, pair_path, args.num_conformers)
        if os.path.exists(
                os.path.join(pair_path, '{}_lig0.log'.format(args.target))):
            os.remove(
                os.path.join(pair_path, '{}_lig0.log'.format(args.target)))

    if args.task == 'conformer_check':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_conformer_check(process, args.raw_root)

    if args.task == 'align_all':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_align_all(process, args.raw_root, args.run_path,
                      args.docked_prot_file, args.align_n)

    elif args.task == 'align_group':
        grouped_files = get_conformer_groups(args.align_n, args.target,
                                             args.start, args.protein,
                                             args.raw_root)
        run_align_group(grouped_files, args.index, args.n, args.protein,
                        args.target, args.start, args.raw_root)

    elif args.task == 'align_check':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_align_check(process, args.raw_root)

    elif args.task == 'align_combine':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        run_align_combine(process, args.raw_root)

    elif args.task == 'run_search':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        search_system_caller(process, args.raw_root, args.run_path,
                             args.docked_prot_file,
                             args.rotation_search_step_size, args.grid_size,
                             grouped_files)

    elif args.task == 'search':
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        run_search(args.protein, args.target, args.start, args.index,
                   args.raw_root, args.get_time, args.rmsd_cutoff,
                   args.rotation_search_step_size, grouped_files[args.index],
                   args.no_prot_h, args.pocket_only)

    elif args.task == 'check_search':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        counter = 0
        unfinished = []
        for protein, target, start in process:
            if counter == 10:
                break
            pair = '{}-to-{}'.format(target, start)
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path, pair)
            conformer_file = os.path.join(pair_path,
                                          "{}_lig0-out.maegz".format(target))
            conformers = list(structure.StructureReader(conformer_file))
            if len(conformers) == 1:
                continue
            else:
                counter += 1
            save_folder = os.path.join(
                os.getcwd(), 'decoy_timing_data',
                '{}_{}-to-{}'.format(protein, target, start))
            for i in range(len(grouped_files)):
                if not os.path.exists(
                        os.path.join(save_folder, '{}.csv'.format(i))):
                    unfinished.append((protein, target, start, i))
        print("Missing:", len(unfinished))
        print(unfinished)

    elif args.task == 'test_search':
        run_test_search(args.protein, args.target, args.start, args.raw_root,
                        args.rmsd_cutoff, args.rotation_search_step_size,
                        pair_path, args.no_prot_h, args.pocket_only,
                        args.get_time)

    elif args.task == 'get_grid_data':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        get_data(process, grouped_files, args.raw_root, args.grid_size)

    elif args.task == 'combine_search_data':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        grouped_files = get_grid_groups(args.grid_size, args.grid_n)
        get_data(process, grouped_files, args.raw_root, args.grid_size, True)

    elif args.task == 'get_dist':
        process = get_prots(args.docked_prot_file)
        random.shuffle(process)
        counter = 0
        for protein, target, start in process:
            if counter == 10:
                break
            pair = '{}-to-{}'.format(target, start)
            protein_path = os.path.join(args.raw_root, protein)
            pair_path = os.path.join(protein_path, pair)
            conformer_file = os.path.join(pair_path,
                                          "{}_lig0-out.maegz".format(target))
            conformers = list(structure.StructureReader(conformer_file))
            if len(conformers) == 1:
                continue
            else:
                counter += 1
            start_lig_file = os.path.join(pair_path,
                                          '{}_lig.mae'.format(start))
            start_lig = list(structure.StructureReader(start_lig_file))[0]
            start_lig_center = list(get_centroid(start_lig))
            target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                           '{}_lig0.mae'.format(target))
            target_lig = list(structure.StructureReader(target_lig_file))[0]
            target_lig_center = list(get_centroid(target_lig))
            dist = math.sqrt((
                (start_lig_center[0] - target_lig_center[0])**2) + (
                    (start_lig_center[1] - target_lig_center[1])**2) +
                             ((start_lig_center[2] - target_lig_center[2])**2))
            print(protein, target, start, dist)

    elif args.task == 'test_rotate_translate':
        prot_file = os.path.join(pair_path, '{}_prot.mae'.format(args.start))
        schrodinger_prot = list(structure.StructureReader(prot_file))[0]
        custom_prot = list(structure.StructureReader(prot_file))[0]
        translation_vector = np.random.uniform(low=-100, high=100, size=(3))
        transform.translate_structure(schrodinger_prot, translation_vector[0],
                                      translation_vector[1],
                                      translation_vector[2])
        translate_structure(custom_prot, translation_vector[0],
                            translation_vector[1], translation_vector[2])
        schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False))
        custom_atoms = np.array(custom_prot.getXYZ(copy=False))
        if np.array_equal(schrodinger_atoms, custom_atoms):
            print("Translate function works properly")
        else:
            print("Error in translate function")

        schrodinger_prot = list(structure.StructureReader(prot_file))[0]
        custom_prot = list(structure.StructureReader(prot_file))[0]
        rotation_vector = np.random.uniform(low=-2 * np.pi,
                                            high=2 * np.pi,
                                            size=(3))
        rotation_center = np.random.uniform(low=-100, high=100, size=(3))
        rotation_center = [
            rotation_center[0], rotation_center[1], rotation_center[2]
        ]
        transform.rotate_structure(schrodinger_prot, rotation_vector[0],
                                   rotation_vector[1], rotation_vector[2],
                                   rotation_center)
        coords = rotate_structure(custom_prot.getXYZ(copy=False),
                                  rotation_vector[0], rotation_vector[1],
                                  rotation_vector[2], rotation_center)
        custom_prot.setXYZ(coords)
        schrodinger_atoms = np.array(schrodinger_prot.getXYZ(copy=False))
        custom_atoms = np.array(custom_prot.getXYZ(copy=False))
        if np.amax(np.absolute(schrodinger_atoms - custom_atoms)) < 10**-7:
            print("Rotate function works properly")
        else:
            print("Error in rotate function")

    elif args.task == 'get_rmsd':
        conformer_file = os.path.join(
            pair_path,
            "aligned_to_start_without_hydrogen_conformers.mae".format(
                args.target))
        conformers = list(structure.StructureReader(conformer_file))

        target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                       '{}_lig0.mae'.format(args.target))
        target_lig = list(structure.StructureReader(target_lig_file))[0]
        build.delete_hydrogens(target_lig)
        start_lig_file = os.path.join(pair_path,
                                      '{}_lig.mae'.format(args.start))
        start_lig = list(structure.StructureReader(start_lig_file))[0]
        start_lig_center = list(get_centroid(start_lig))

        rmsds = []
        for i, conformer in tqdm(enumerate(conformers),
                                 desc='going through conformers'):
            conformer_center = list(get_centroid(conformer))
            translate_structure(conformer,
                                start_lig_center[0] - conformer_center[0],
                                start_lig_center[1] - conformer_center[1],
                                start_lig_center[2] - conformer_center[2])
            rmsds.append(
                (conformer,
                 rmsd.calculate_in_place_rmsd(conformer,
                                              conformer.getAtomIndices(),
                                              target_lig,
                                              target_lig.getAtomIndices()), i))

        # best_match_conformer = min(rmsds, key=lambda x: x[1])
        # print(best_match_conformer[1], best_match_conformer[2])
        # file = os.path.join(pair_path, 'best_match_conformer.mae')
        # with structure.StructureWriter(file) as best_match:
        #     best_match.append(best_match_conformer[0])
        print(rmsds[248][1], rmsds[248][2])
        file = os.path.join(pair_path, 'translated_conformer_248.mae')
        with structure.StructureWriter(file) as best_match:
            best_match.append(rmsds[248][0])

    elif args.task == 'check_rotation':
        target_lig_file = os.path.join(pair_path, 'ligand_poses',
                                       '{}_lig0.mae'.format(args.target))
        target_lig = list(structure.StructureReader(target_lig_file))[0]
        remove = [i for i in target_lig.getAtomIndices() if i != 1]
        target_lig.deleteAtoms(remove)
        center = list(get_centroid(target_lig))
        print("ROTATE 5,5,5")
        rotate_structure(target_lig, math.radians(5), math.radians(5),
                         math.radians(5), center)

        target_lig_2 = list(structure.StructureReader(target_lig_file))[0]
        target_lig_2.deleteAtoms(remove)
        center = list(get_centroid(target_lig_2))
        print("ROTATE 5,0,0")
        rotate_structure(target_lig_2, math.radians(5), 0, 0, center)
        print("ROTATE 0,5,0")
        rotate_structure(target_lig_2, 0, math.radians(5), 0, center)
        print("ROTATE 0,0,5")
        rotate_structure(target_lig_2, 0, 0, math.radians(5), center)

        print(
            rmsd.calculate_in_place_rmsd(target_lig,
                                         target_lig.getAtomIndices(),
                                         target_lig_2,
                                         target_lig_2.getAtomIndices()))
        print(target_lig.getXYZ(copy=False))
        print(target_lig_2.getXYZ(copy=False))
Beispiel #18
0
        with open('{}/run/grid{}_in.sh'.format(save, i), 'w') as f:

            for s_file in group:
                out_f = s_file[:12]
                os.system('mkdir -p {}/{}'.format(save, out_f))

                with open('{}/{}/{}.in'.format(save, out_f, out_f),
                          'w') as f_in:

                    if len(s_file) != 16:
                        continue

                    s = next(StructureReader(ligands + s_file[:4] +
                                             '_lig.mae'))
                    c = get_centroid(s)
                    x, y, z = c[:3]

                    f_in.write('GRID_CENTER {},{},{}\n'.format(x, y, z))
                    f_in.write('GRIDFILE {}.zip\n'.format(out_f))
                    f_in.write('INNERBOX 15,15,15\n')
                    f_in.write('OUTERBOX 30,30,30\n')
                    f_in.write('RECEP_FILE {}/{}\n'.format(root, s_file))
                    f.write('#!/bin/bash\n')
                    f.write('cd {}/{}\n'.format(save, out_f))
                    f.write('$SCHRODINGER/glide -WAIT {}.in\n'.format(out_f))

        os.chdir('{}/run'.format(save))
        os.system(
            'sbatch -p owners -t 02:00:00 -o grid{}.out grid{}_in.sh'.format(
                i, i))