Exemple #1
0
 def test_read_text(self):
     test_filename = self.make_test_txt()
     readtext_obj = ReadText()
     readtext_result = readtext_obj.parse_txt(test_filename)
     known_answer = self.get_parsed_test_data()
     for key in known_answer.keys():
         self.assertEqual(readtext_result[key], known_answer[key])
Exemple #2
0
    def run_scientific_protein_prep(self, challenge_data_path,
                                    pdb_protein_path, working_folder):
        abs_challenge_data_path = os.path.abspath(challenge_data_path)
        chal_data_obj = ChallengeData(abs_challenge_data_path)
        if not (chal_data_obj.is_valid_for_celpp()):
            logging.info(
                '%s is not a valid CELPP challenge data directory. Unable to run protein prep.'
                % (abs_challenge_data_path))
            return False
        week_chal_data_dict = chal_data_obj.get_targets()
        week_name = week_chal_data_dict.keys()[0]
        abs_week_path = os.path.join(abs_challenge_data_path, week_name)
        pot_target_dirs = week_chal_data_dict[week_name]
        os.chdir(working_folder)
        current_dir_layer_1 = os.getcwd()

        ## Get all potential target directories and candidates within
        valid_candidates = {}

        # Ensure that the chalengedata targets are valid and copy in files
        for pot_target_dir in pot_target_dirs:
            os.chdir(current_dir_layer_1)
            pot_target_id = os.path.basename(pot_target_dir.strip('/'))
            # Does it look like a pdb id?
            if len(pot_target_id) != 4:
                logging.info(
                    'Filtering potential target directories: %s is not 4 characters long. Skipping'
                    % (pot_target_id))
                continue

            os.mkdir(pot_target_id)

            valid_candidates[pot_target_id] = []
            target_dir_path = os.path.join(abs_week_path, pot_target_id)

            # Copy in <targ id>.txt file
            targ_info_basename = pot_target_id + '.txt'
            targ_info_file = os.path.join(target_dir_path, targ_info_basename)
            targ_info_dest = os.path.join(pot_target_id, targ_info_basename)
            shutil.copyfile(targ_info_file, targ_info_dest)

            # Copy in center.txt file
            center_file = os.path.join(target_dir_path, 'center.txt')
            #center_file_basename = os.path.basename(center_file)
            center_file_dest = os.path.join(pot_target_id, 'center.txt')
            shutil.copyfile(center_file, center_file_dest)

            # Copy in each valid candidate
            for candidate_file in glob.glob('%s/*-%s_*.pdb' %
                                            (target_dir_path, pot_target_id)):
                # The LMCSS ligand will be in a pdb file called something like celpp_week19_2016/1fcz/LMCSS-1fcz_1fcz-156-lig.pdb
                # We want to make sure we don't treat this like a receptor
                if 'lig.pdb' in candidate_file:
                    continue
                candidate_file_basename = os.path.basename(candidate_file)
                candidate_file_dest = os.path.join(pot_target_id,
                                                   candidate_file_basename)
                shutil.copyfile(candidate_file, candidate_file_dest)
                candidate_local_file = os.path.basename(candidate_file)
                valid_candidates[pot_target_id].append(candidate_local_file)

        for target_id in valid_candidates.keys():
            os.chdir(current_dir_layer_1)
            os.chdir(target_id)
            current_dir_layer_2 = os.getcwd()
            ReadText_obj = ReadText()
            targ_info_dict = ReadText_obj.parse_txt(target_id + '.txt')

            for candidate_filename in valid_candidates[target_id]:
                os.chdir(current_dir_layer_2)
                ## Parse the candidate name
                ## Get the method type, target, and candidate info from the filename
                # for example, this will parse 'hiResApo-5hib_2eb2.pdb' into [('hiResApo', '5hib', '2eb2')]

                parsed_name = re.findall(
                    '([a-zA-Z0-9]+)-([a-zA-Z0-9]+)_([a-zA-Z0-9]+)-?([a-zA-Z0-9]*).pdb',
                    candidate_filename)
                if len(parsed_name) != 1:
                    logging.info(
                        'Failed to parse docked structure name "%s". Parsing yielded %r'
                        % (candidate_filename, parsed_name))
                    continue
                candidate_structure_type = parsed_name[0][0]
                candidate_structure_target = parsed_name[0][1]
                candidate_structure_candidate = parsed_name[0][2]
                candidate_structure_ligand = parsed_name[0][2]

                candidate_prefix = '%s-%s_%s' % (candidate_structure_type,
                                                 candidate_structure_target,
                                                 candidate_structure_candidate)
                # Make candidate prep directory
                os.mkdir(candidate_prefix)
                # Copy in raw candidate file
                candidate_copy_origin = candidate_filename
                candidate_copy_dest = os.path.join(candidate_prefix,
                                                   candidate_filename)
                shutil.copyfile(candidate_copy_origin, candidate_copy_dest)
                # Copy in center file
                center_copy_origin = 'center.txt'
                center_copy_dest = os.path.join(candidate_prefix, 'center.txt')
                shutil.copyfile(center_copy_origin, center_copy_dest)
                # Move into candidate prep directory
                os.chdir(candidate_prefix)

                # Run prep
                prepared_protein_file = "%s_prepared%s" % (
                    candidate_prefix, ProteinPrep.OUTPUT_PROTEIN_SUFFIX)

                try:
                    preparation_result = self.receptor_scientific_prep(
                        candidate_filename,
                        prepared_protein_file,
                        targ_info_dict=targ_info_dict)
                except:
                    logging.info(traceback.format_exc())
                    logging.info(
                        'try/except statement caught error in scientific protein prep. Skipping candidate %s'
                        % (candidate_prefix))
                    continue

                if preparation_result == False:
                    logging.info("Unable to prepare this protein:%s" %
                                 (candidate_filename))
                    continue
                if not (os.path.exists(prepared_protein_file)):
                    logging.info(
                        'Expected output file %s does not exist after protein preparation. Assuming that protein prep failed. Skipping candidate %s'
                        % (prepared_protein_file, candidate_prefix))
                    continue
                if os.path.getsize(prepared_protein_file) == 0:
                    logging.info(
                        'Expected output file %s has size 0. Assuming that protein prep failed. Skipping candidate %s'
                        % (prepared_protein_file, candidate_prefix))
                    continue

                prepared_receptor_origin = prepared_protein_file
                prepared_receptor_dest = os.path.join(current_dir_layer_2,
                                                      prepared_protein_file)
                shutil.copyfile(prepared_receptor_origin,
                                prepared_receptor_dest)

                logging.info("Successfully prepared this protein:%s" %
                             (prepared_protein_file))

            os.chdir(current_dir_layer_1)
Exemple #3
0
    def run_scientific_ligand_prep(self, challenge_data_path, pdb_protein_path,
                                   working_folder):
        abs_challenge_data_path = os.path.abspath(challenge_data_path)
        chal_data_obj = ChallengeData(abs_challenge_data_path)
        if not (chal_data_obj.is_valid_for_celpp()):
            logging.info(
                '%s is not a valid CELPP challenge data directory. Unable to run ligand prep.'
                % (abs_challenge_data_path))
            return False
        week_chal_data_dict = chal_data_obj.get_targets()
        week_name = week_chal_data_dict.keys()[0]
        abs_week_path = os.path.join(abs_challenge_data_path, week_name)
        pot_target_dirs = week_chal_data_dict[week_name]
        os.chdir(working_folder)
        current_dir_layer_1 = os.getcwd()

        ## Get all potential target directories and candidates within
        valid_targets = {}

        # Ensure that the challengedata targets are valid and copy in files
        for pot_target_dir in pot_target_dirs:
            os.chdir(current_dir_layer_1)
            pot_targ_id = os.path.basename(pot_target_dir.strip('/'))

            # Does it look like a pdb id?
            if len(pot_targ_id) != 4:
                logging.info(
                    'Filtering potential target directories: %s is not 4 characters long. Skipping'
                    % (pot_targ_id))
                continue
            os.mkdir(pot_targ_id)
            target_dir_path = os.path.join(abs_week_path, pot_targ_id)

            # Copy in <targ id>.txt file
            targ_info_basename = pot_targ_id + '.txt'
            origin_txt_file = os.path.join(target_dir_path, targ_info_basename)
            dest_txt_file = os.path.join(pot_targ_id, targ_info_basename)
            shutil.copyfile(origin_txt_file, dest_txt_file)

            # Pull in the ligand inchi/smiles
            lig_smiles_files = glob.glob('%s/lig_*.smi' % (target_dir_path))
            if len(lig_smiles_files) != 1:
                logging.info(
                    'Unable to find unambiguous ligand smiles for %s - glob returned %r'
                    % (pot_targ_id, lig_smiles_files))
                continue
            lig_smiles_file = lig_smiles_files[0]
            local_smiles_file = os.path.basename(lig_smiles_file)
            dest_smiles_file = os.path.join(pot_targ_id, local_smiles_file)
            shutil.copyfile(lig_smiles_file, dest_smiles_file)

            valid_targets[pot_targ_id] = local_smiles_file

        for target_id in valid_targets.keys():
            os.chdir(current_dir_layer_1)
            os.chdir(target_id)
            smiles_filename = valid_targets[target_id]

            # Parse the <targ id>.txt file
            ReadText_obj = ReadText()
            targ_info_dict = ReadText_obj.parse_txt(target_id + '.txt')

            # Prepare the ligand
            lig_prefix = smiles_filename.replace('.smi', '')
            prepared_lig_file = '%s_prepared%s' % (
                lig_prefix, LigandPrep.OUTPUT_LIG_SUFFIX)
            try:
                lig_prep_result = self.ligand_scientific_prep(
                    smiles_filename,
                    prepared_lig_file,
                    targ_info_dict=targ_info_dict)
            except:
                logging.info(sys.exc_info())
                logging.info(
                    "try/except caught error in ligand_scientific_prep function.  Skipping target %s"
                    % (target_id))
                continue
            if lig_prep_result == False:
                logging.info(
                    "Unable to prepare the ligand for this target protein: %s. Skipping"
                    % (target_id))
                continue

            if not (os.path.exists(prepared_lig_file)):
                logging.info(
                    'Expected output file %s does not exist. Assuming that ligand prep failed. Skipping target %s'
                    % (prepared_lig_file, target_id))
                continue
            if os.path.getsize(prepared_lig_file) == 0:
                logging.info(
                    'Expected output file %s has size 0. Assuming that ligand prep failed. Skipping candidate %s'
                    % (prepared_lig_file, target_id))
                continue

            logging.info("Successfully prepared ligand %s for target %s" %
                         (lig_prefix, target_id))
Exemple #4
0
    def run_dock(self, prot_sci_prep_dir, lig_sci_prep_dir, dock_dir):
        #os.chdir(prep_result_dir)
        abs_lig_sci_prep_dir = os.path.abspath(lig_sci_prep_dir)
        abs_prot_sci_prep_dir = os.path.abspath(prot_sci_prep_dir)
        abs_dock_dir = os.path.abspath(dock_dir)

        targ_prot_prep_dirs = glob.glob('%s/????' % (abs_prot_sci_prep_dir))
        #targ_lig_prep_dirs = glob.glob('%s/????' %(abs_lig_prep_dir))
        #prepped_prot_targs = [os.path.basename(i.rstrip('/')) for i in targ_prot_prep_dirs]
        #prepped_lig_targs = [os.path.basename(i.rstrip('/')) for i in targ_lig_prep_dirs]

        targ_dic = {}

        for targ_prot_prep_dir in targ_prot_prep_dirs:
            targ_name = os.path.basename(targ_prot_prep_dir.rstrip('/'))
            targ_lig_prep_dir = os.path.join(abs_lig_sci_prep_dir, targ_name)
            targ_dock_dir = os.path.join(abs_dock_dir, targ_name)
            #os.mkdir(target_dock_dir)
            logging.info(
                "============= Starting to process target:%s =============" %
                targ_name)
            targ_dic[targ_name] = {}
            # Until we find a good ligand and cand receptor, mark this as invalid
            targ_dic[targ_name]['valid_targ'] = False

            # Get the binding pocket center
            pocket_center = self.get_pocket_center(targ_prot_prep_dir)
            if pocket_center == False:
                logging.info(
                    'Failed to find pocket center file in dirctory %s. Skipping target %s.'
                    % (targ_prot_prep_dir, targ_name))
                continue

            # Get the ligand name in this directory
            sci_prepped_lig_file = self.get_sci_prepped_lig(
                targ_lig_prep_dir, Dock.SCI_PREPPED_LIG_SUFFIX)
            if sci_prepped_lig_file == False:
                logging.info(
                    'Unable to find single ligand for target dir %s. Skipping target %s.'
                    % (targ_lig_prep_dir, targ_name))
                continue

            # Process ligand file names
            lig_prefix = self.parse_lig_filename(sci_prepped_lig_file)
            if lig_prefix == False:
                logging.info(
                    'Unable to parse ligand filename %s. Skipping target %s.' %
                    (sci_prepped_lig_file, targ_name))

            # Get the cand protein names in this directory
            potential_cand_proteins = glob.glob(
                '%s/*-????_????%s' %
                (targ_prot_prep_dir, Dock.SCI_PREPPED_PROT_SUFFIX))

            # Process potential cand protein names
            for potential_cand_protein in potential_cand_proteins:
                potential_cand_basename = os.path.basename(
                    potential_cand_protein)
                category, targ_id, cand_id = self.parse_cand_name(
                    potential_cand_basename)
                targ_txt_file = os.path.join(targ_lig_prep_dir,
                                             targ_id + '.txt')
                # If this is the first valid cand for this target, make a new dictionary key and mark the target as valid
                if not ('valid_cands' in targ_dic[targ_name].keys()):
                    targ_dic[targ_name]['valid_targ'] = True
                    targ_dic[targ_name]['prot_prep_dir'] = targ_prot_prep_dir
                    targ_dic[targ_name]['lig_prep_dir'] = targ_lig_prep_dir
                    targ_dic[targ_name]['pocket_center'] = pocket_center
                    targ_dic[targ_name]['targ_txt_file'] = targ_txt_file
                    targ_dic[targ_name]['lig_file'] = sci_prepped_lig_file
                    targ_dic[targ_name]['lig_prefix'] = lig_prefix
                    targ_dic[targ_name]['valid_cands'] = []
                targ_dic[targ_name]['valid_cands'].append(
                    (potential_cand_protein, category, targ_id, cand_id))

            os.chdir(abs_dock_dir)

        # Print out all of the target/cands we will dock to
        logging.info('targ_dic is: ')
        for targ in targ_dic.keys():
            logging.info('%s: %s' % (targ, targ_dic[targ]))

        # Generate a list of targets for docking
        targ_names = targ_dic.keys()
        targ_names = [
            i for i in targ_names if targ_dic[i]['valid_targ'] == True
        ]
        targ_names.sort()

        # Begin populating the target directory
        os.chdir(abs_dock_dir)
        for targ_name in targ_names:
            pocket_center = targ_dic[targ_name]['pocket_center']
            os.chdir(abs_dock_dir)
            # Make the target directory
            os.mkdir(targ_name)
            os.chdir(targ_name)
            abs_targ_dock_dir = os.getcwd()

            # Copy the targ.txt file in
            copy_dest = '%s/%s' % (abs_targ_dock_dir,
                                   os.path.basename(
                                       targ_dic[targ_name]['targ_txt_file']))
            shutil.copyfile(targ_dic[targ_name]['targ_txt_file'], copy_dest)
            # Parse the targ.txt file
            ReadText_obj = ReadText()
            targ_info_dict = ReadText_obj.parse_txt(copy_dest)

            #### Run CELPPade technical prep

            ### Ligand technical prep

            ## Ligand technical prep setup
            lig_tech_prep_dir = 'lig_%s_tech_prep' % (
                targ_dic[targ_name]['lig_prefix'])
            os.mkdir(lig_tech_prep_dir)

            # Copy the sci prepped ligand in
            copy_dest = '%s/%s' % (lig_tech_prep_dir,
                                   os.path.basename(
                                       targ_dic[targ_name]['lig_file']))
            shutil.copyfile(targ_dic[targ_name]['lig_file'], copy_dest)

            lig_base_filename = os.path.basename(
                targ_dic[targ_name]['lig_file'])
            os.chdir(lig_tech_prep_dir)

            ## Call user-defined ligand technical prep
            try:
                tech_prepped_lig_file_list = self.ligand_technical_prep(
                    lig_base_filename, targ_info_dict=targ_info_dict)
            except:
                logging.info(sys.exc_info())
                logging.info(
                    'try/except statement caught error in function lig_technical_prep. Skipping target %s.'
                    % (os.path.abspath(lig_base_filename), targ_name))

                continue

            ## Ensure that ligand technical prep was successful
            # Check for function-reported failure
            if tech_prepped_lig_file_list == False:
                logging.info(
                    'Technical ligand preparation failed on %s. Skipping target %s.'
                    % (os.path.abspath(lig_base_filename), targ_name))
                continue

            # Ensure that ligand technical prep returns a list of filenames
            if not (type(tech_prepped_lig_file_list) is list):
                logging.info(
                    'Technical ligand preparation for %s did not return a list of filenames. Skipping target %s.'
                    % (os.path.abspath(lig_base_filename), targ_name))
                continue

            # Ensure that all files in list really exist
            for filename in tech_prepped_lig_file_list:
                if not (os.path.exists(filename)):
                    logging.info(
                        'Technical ligand preparation for %s returned file list %r, but file %s does not exist. Skipping target %s.'
                        % (os.path.abspath(lig_base_filename),
                           tech_prepped_lig_file_list, filename, targ_name))
                    continue

            ## Prepare to copy these files for later
            tech_prepped_lig_file_list = [
                os.path.abspath(i) for i in tech_prepped_lig_file_list
            ]
            targ_dic[targ_name][
                'tech_prepped_lig_files'] = tech_prepped_lig_file_list

            logging.info(
                'Technical ligand prep successful for %s. All files exist from returned list %r. '
                % (os.path.abspath(lig_base_filename),
                   tech_prepped_lig_file_list))

            ### Candidate tech prep
            for cand_file, category, targ_id, cand_id in targ_dic[targ_name][
                    'valid_cands']:
                os.chdir(abs_targ_dock_dir)

                ## Candidate tech prep setup
                cand_tech_prep_dir = '%s_%s_tech_prep/' % (category, cand_id)
                os.mkdir(cand_tech_prep_dir)
                copy_dest = '%s/%s' % (cand_tech_prep_dir,
                                       os.path.basename(cand_file))
                shutil.copyfile(cand_file, copy_dest)
                prot_base_filename = os.path.basename(cand_file)
                os.chdir(cand_tech_prep_dir)

                ## Call user-defined protein technical prep
                try:
                    tech_prepped_prot_file_list = self.receptor_technical_prep(
                        prot_base_filename,
                        pocket_center,
                        targ_info_dict=targ_info_dict)
                except:
                    logging.info(sys.exc_info())
                    logging.info(
                        'try/except statement caught error in function receptor_technical_prep.  Skipping candidate %s for target %s.'
                        % (os.path.abspath(prot_base_filename), cand_id,
                           targ_name))
                    continue

                ## Ensure that receptor technical prep was successful
                # Check for function-reported failure
                if tech_prepped_prot_file_list == False:
                    logging.info(
                        'Technical protein preparation failed on %s. Skipping candidate %s for target %s.'
                        % (os.path.abspath(prot_base_filename), cand_id,
                           targ_name))
                    continue

                # Ensure that receptor technical prep returns a list of filenames
                if not (type(tech_prepped_prot_file_list) is list):
                    logging.info(
                        'Technical protein preparation for %s did not return a list of filenames. Skipping candidate %s for target %s.'
                        % (os.path.abspath(prot_base_filename), cand_id,
                           targ_name))
                    continue

                # Ensure that all files in list really exist
                for filename in tech_prepped_prot_file_list:
                    if not (os.path.exists(filename)):
                        logging.info(
                            'Technical protein preparation for %s returned file list %r, but file %s does not exist. Skipping candidate %s for target %s.'
                            % (os.path.abspath(lig_base_filename),
                               tech_prepped_lig_file_list, filename, cand_id,
                               targ_name))
                        continue

                ## Prepare to copy these files for docking step
                tech_prepped_prot_file_list = [
                    os.path.abspath(i) for i in tech_prepped_prot_file_list
                ]

                logging.info(
                    'Protein technical prep successful for %s. All files exist from returned list %r.'
                    % (cand_file, tech_prepped_prot_file_list))

                #### Run CELPPade docking

                os.chdir(abs_targ_dock_dir)
                cand_dock_dir = '%s_%s_docking' % (category, cand_id)
                os.mkdir(cand_dock_dir)
                os.chdir(cand_dock_dir)

                ## Prepare expected file names
                output_receptor_pdb = '%s-%s_%s_docked.pdb' % (
                    category, targ_id, cand_id)
                output_lig_mol = '%s-%s_%s_docked.mol' % (category, targ_id,
                                                          cand_id)

                ## Copy in tech prepped files
                for filename in tech_prepped_lig_file_list:
                    file_base_name = os.path.basename(filename)
                    shutil.copyfile(filename, file_base_name)
                for filename in tech_prepped_prot_file_list:
                    file_base_name = os.path.basename(filename)
                    shutil.copyfile(filename, file_base_name)

                ## Do the actual docking
                try:
                    dock_results = self.dock(tech_prepped_lig_file_list,
                                             tech_prepped_prot_file_list,
                                             output_receptor_pdb,
                                             output_lig_mol,
                                             targ_info_dict=targ_info_dict)
                except:
                    logging.info(sys.exc_info())
                    logging.info(
                        'try/except statement caught error in dock() function. Docking was given '
                        'inputs tech_prepped_lig_file_list=%r tech_prepped_prot_file_list=%r '
                        'output_receptor_pdb=%r output_lig_mol=%r. Skipping docking to this '
                        'candidate.' % (tech_prepped_lig_file_list,
                                        tech_prepped_prot_file_list,
                                        output_receptor_pdb, output_lig_mol))
                    continue

                ## Check for success
                # Check for self-reported failure
                if dock_results == False:
                    logging.info(
                        'Docking returned False given inputs: tech_prepped_lig_file_list=%r   '
                        'tech_prepped_prot_file_list=%r    output_receptor_pdb=%r     output_lig_mol=%r. '
                        'Skipping docking to this candidate.' %
                        (tech_prepped_lig_file_list,
                         tech_prepped_prot_file_list, output_receptor_pdb,
                         output_lig_mol))
                    continue
                # Ensure that correct output files exist
                if not (os.path.exists(output_receptor_pdb)) or (
                        os.path.getsize(output_receptor_pdb) == 0):
                    logging.info(
                        'Docking did not create receptor pdb file %s given inputs:   '
                        'tech_prepped_lig_file_list=%r   tech_prepped_prot_file_list=%r    '
                        'output_receptor_pdb=%r     output_lig_mol=%r. Skipping docking '
                        'to this candidate.' %
                        (output_receptor_pdb, tech_prepped_lig_file_list,
                         tech_prepped_prot_file_list, output_receptor_pdb,
                         output_lig_mol))
                    continue
                if not (os.path.exists(output_lig_mol)) or (
                        os.path.getsize(output_lig_mol) == 0):
                    logging.info(
                        'Docking did not create ligand mol file %s given inputs:   '
                        'tech_prepped_lig_file_list=%r   tech_prepped_prot_file_list=%r    '
                        'output_receptor_pdb=%r     output_lig_mol=%r. Skipping docking '
                        'to this candidate.' %
                        (output_lig_mol, tech_prepped_lig_file_list,
                         tech_prepped_prot_file_list, output_receptor_pdb,
                         output_lig_mol))
                    continue

                logging.info(
                    'Docking was successful for %s. Final receptor and ligand '
                    'files %s and %s exist and are nonzero size.' %
                    (cand_file, output_receptor_pdb, output_lig_mol))

                # Prepare to copy docking results into final result directory
                abs_output_receptor_pdb = os.path.abspath(output_receptor_pdb)
                abs_output_lig_mol = os.path.abspath(output_lig_mol)

                # Copy the files one directory up
                os.chdir(abs_targ_dock_dir)
                shutil.copyfile(abs_output_receptor_pdb, output_receptor_pdb)
                shutil.copyfile(abs_output_lig_mol, output_lig_mol)