def test_pacc_parallel_antiparallel(self): file_name = os.path.join(self.test_cc_folder, 'p3_p4.pdb') cc = ampal.load_pdb(file_name) cca = pacc.PACCAnalysis(cc) radius = np.mean(cca.radii_layers) assert radius < 5.1 file_name = os.path.join(self.test_cc_folder, 'APH.pdb') cc = ampal.load_pdb(file_name) cca = pacc.PACCAnalysis(cc) radius = np.mean(cca.radii_layers) assert radius < 5.1
def pdb_check(self, pdb_file_path): with open(pdb_file_path, 'r') as inf: pdb_file = inf.read() structure = ampal.load_pdb(pdb_file_path) # Compare the number of lines in the output ampal pdb with the original pdb_lines = [ x for x in pdb_file.splitlines() if (x.startswith('ATOM') or x.startswith('HETATM')) ] ampal_pdb_lines = [ x for x in structure.make_pdb(ligands=True, alt_states=True).splitlines() if (x.startswith('ATOM') or x.startswith('HETATM')) ] self.assertEqual(len(pdb_lines), len(ampal_pdb_lines)) # Compare the atomic composition pdb_atomic = Counter([x[-4:-2].strip() for x in pdb_lines]) ampal_atomic = Counter([x[-4:-2].strip() for x in ampal_pdb_lines]) self.assertEqual(pdb_atomic, ampal_atomic) # Compare the residue composition pdb_atomic = Counter([x[17:20].strip() for x in pdb_lines]) ampal_atomic = Counter([x[17:20].strip() for x in ampal_pdb_lines]) self.assertEqual(pdb_atomic, ampal_atomic) return
def create_biounit_entry( pdb_path: Path, biounit_num: int, pdb_entry: PdbModel, is_deposited_pdb: bool, preferred_biol_unit: tp.Optional[int], ) -> BiolUnitModel: with gz.open(str(pdb_path)) as inf: contents = inf.read().decode() pdb_ampal = ampal.load_pdb(contents, pdb_id=pdb_path.name, path=False) is_preferred_biol_unit = (False if preferred_biol_unit is None else biounit_num == preferred_biol_unit) biounit_model = BiolUnitModel( biol_unit_number=biounit_num, is_deposited_pdb=is_deposited_pdb, is_preferred_biol_unit=is_preferred_biol_unit, pdb=pdb_entry, ) if isinstance(pdb_ampal, ampal.Assembly): states = [create_state_entry(pdb_ampal, 0, biounit_model)] else: states = [] for i, state in enumerate(pdb_ampal): states.append(create_state_entry(state, i, biounit_model)) return biounit_model
def test_cb_atom_filter(residue_number: int): assembly = ampal.load_pdb(str(TEST_DATA_DIR / "3qy1.pdb")) focus_residue = assembly[0][residue_number] backbone_atoms = ("N", "CA", "C", "O", "CB") for atom in focus_residue: filtered_atom = True if atom.res_label in backbone_atoms else False filtered_scenario = cfds.keep_sidechain_cb_atom_filter(atom) assert filtered_atom == filtered_scenario, f"Expected {atom.res_label} to return {filtered_atom} after filter"
def create_metrics_from_pdb(pdb_string: str) -> DesignMetrics: ampal_assembly = ampal.load_pdb(pdb_string, path=False) # relabel everything to remove annoying insertion codes! ampal_assembly.relabel_all() if isinstance(ampal_assembly, ampal.AmpalContainer): ampal_assembly = ampal_assembly[0] if not ampal_assembly._molecules: raise ValueError("No PDB format data found in file.") design_metrics = analyse_design(ampal_assembly) return design_metrics
def setUp(self): test_files = [ str(TEST_FILE_FOLDER / x) for x in ['1ek9.pdb', '2ht0.pdb', '3qy1.pdb'] ] test_structures = [ampal.load_pdb(x) for x in test_files] self.test_polypeptides = [ p for p in itertools.chain(*test_structures) if isinstance(p, ampal.Polypeptide) ]
def setUp(self): self.cis_tas = [[-179, 120, -40], [0, -60, 20]] self.cis_dipeptide = isambard.specifications.TAPolypeptide( self.cis_tas) self.trans_tas = [[0, -60, 20], [-179, 120, -40]] self.trans_dipeptide = isambard.specifications.TAPolypeptide( self.trans_tas) test_file = str(TEST_FILES_PATH / '1ek9.pdb') test_structure = ampal.load_pdb(test_file) self.test_polypeptides = [ p for p in test_structure if isinstance(p, ampal.Polypeptide) ]
def test_create_residue_frame_backbone_only(residue_number): assembly = ampal.load_pdb(str(TEST_DATA_DIR / "3qy1.pdb")) focus_residue = assembly[0][residue_number] # Make sure that residue correctly aligns peptide plane to XY cfds.align_to_residue_plane(focus_residue) assert np.array_equal(focus_residue["CA"].array, ( 0, 0, 0, )), "The CA atom should lie on the origin." assert np.isclose(focus_residue["N"].x, 0), "The nitrogen atom should lie on XY." assert np.isclose(focus_residue["N"].z, 0), "The nitrogen atom should lie on XY." assert np.isclose(focus_residue["C"].z, 0), "The carbon atom should lie on XY." # Make sure that all relevant atoms are pulled into the frame frame_edge_length = 12.0 voxels_per_side = 21 centre = voxels_per_side // 2 max_dist = np.sqrt(((frame_edge_length / 2)**2) * 3) for atom in (a for a in assembly.get_atoms(ligands=False) if cfds.within_frame(frame_edge_length, a)): assert g.distance(atom, (0, 0, 0)) <= max_dist, ( "All atoms filtered by `within_frame` should be within " "`frame_edge_length/2` of the origin") # Make sure that aligned residue sits on XY after it is discretized single_res_assembly = ampal.Assembly(molecules=ampal.Polypeptide( monomers=copy.deepcopy(focus_residue).backbone)) # Need to reassign the parent so that the residue is the only thing in the assembly single_res_assembly[0].parent = single_res_assembly single_res_assembly[0][0].parent = single_res_assembly[0] # Obtain atom encoder: codec = cfds.Codec.CNO() array = cfds.create_residue_frame(single_res_assembly[0][0], frame_edge_length, voxels_per_side, encode_cb=False, codec=codec) np.testing.assert_array_equal(array[centre, centre, centre], [True, False, False], err_msg="The central atom should be CA.") nonzero_indices = list(zip(*np.nonzero(array))) assert (len(nonzero_indices) == 4 ), "There should be only 4 backbone atoms in this frame" nonzero_on_xy_indices = list(zip(*np.nonzero(array[:, :, centre]))) assert (3 <= len(nonzero_on_xy_indices) <= 4), "N, CA and C should lie on the xy plane."
def pack_side_chains_scwrl(assembly, sequences, rigid_rotamer_model=True, hydrogens=False): """Packs side chains onto a protein structure. Parameters ---------- assembly : AMPAL Assembly AMPAL object containing some protein structure. sequence : [str] A list of amino acid sequences in single-letter code for Scwrl to pack. rigid_rotamer_model : bool, optional If True, Scwrl will use the rigid-rotamer model, which is faster but less accurate. hydrogens : bool, optional If False, the hydrogens produced by Scwrl will be ommitted. Returns ------- packed_structure : AMPAL Assembly A new AMPAL Assembly containing the packed structure, with the Scwrl score in the tags. """ if not scwrl_available(): raise ValueError('Scwrl4 is unavailable on your system path.') protein = [x for x in assembly if isinstance(x, ampal.Polypeptide)] total_seq_len = sum([len(x) for x in sequences]) total_aa_len = sum([len(x) for x in protein]) if total_seq_len != total_aa_len: raise ValueError('Total sequence length ({}) does not match ' 'total Polypeptide length ({}).'.format( total_seq_len, total_aa_len)) if len(protein) != len(sequences): raise ValueError('Number of sequences ({}) does not match ' 'number of Polypeptides ({}).'.format( len(sequences), len(protein))) scwrl_std_out, scwrl_pdb = run_scwrl( assembly.pdb, ''.join(sequences), path=False, rigid_rotamer_model=rigid_rotamer_model, hydrogens=hydrogens) packed_structure, scwrl_score = parse_scwrl_out(scwrl_std_out, scwrl_pdb) new_assembly = ampal.load_pdb(packed_structure, path=False) new_assembly.tags['scwrl_score'] = scwrl_score return new_assembly
def test_make_frame_dataset_as_gaussian(): """Tests the creation of a frame data set.""" test_file = TEST_DATA_DIR / "1ubq.pdb" frame_edge_length = 18.0 voxels_per_side = 31 ampal_1ubq = ampal.load_pdb(str(test_file)) for atom in ampal_1ubq.get_atoms(): if not cfds.default_atom_filter(atom): del atom.parent.atoms[atom.res_label] del atom with tempfile.TemporaryDirectory() as tmpdir: # Obtain atom encoder: codec = cfds.Codec.CNO() output_file_path = cfds.make_frame_dataset( structure_files=[test_file], output_folder=tmpdir, name="test_dataset", frame_edge_length=frame_edge_length, voxels_per_side=voxels_per_side, verbosity=1, require_confirmation=False, codec=codec, voxels_as_gaussian=True, ) with h5py.File(output_file_path, "r") as dataset: for n in range(1, 77): # check that the frame for all the data frames match between the input # arrays and the ones that come out of the HDF5 data set residue_number = str(n) test_frame = cfds.create_residue_frame( residue=ampal_1ubq["A"][residue_number], frame_edge_length=frame_edge_length, voxels_per_side=voxels_per_side, encode_cb=False, codec=codec, voxels_as_gaussian=True, ) hdf5_array = dataset["1ubq"]["A"][residue_number][()] npt.assert_array_equal( hdf5_array, test_frame, err_msg= ("The frame in the HDF5 data set should be the same as the " "input frame."), )
def test_pacc_on_ccs(self): """Tests PACC for each file in testing_files/test_ccs""" assert len(self.pdb_paths) > 0 for file in self.pdb_paths: cc = ampal.load_pdb(file) ccr = ampal.Assembly() for h in cc: ccr.append(h[1:28]) cc_pacc = pacc.PACCAnalysis(ccr) # Check register assignment register, fit = cc_pacc.heptad_register() self.check_register(register, cc_pacc.cc_len) for p in [ cc_pacc.radii_layers, cc_pacc.alpha_layers, cc_pacc.ca_layers ]: self.check_parameters( cc_pacc.calc_average_parameters(p)[0], cc_pacc.cc_len) return
def check_ampal_contents(self, pdb_path): """Tests if all atoms present are correctly formatted.""" with open(pdb_path, 'r') as inf: pdb_lines = inf.readlines() pdb_atom_numbers = set() pdb_monomer_labels = {} pdb_has_alt_conf = set() for line in pdb_lines: record_name = line[:6].strip() if (record_name == 'ATOM') or (record_name == 'HETATM'): atom_number = int(line[6:11].strip()) pdb_atom_numbers.add(atom_number) monomer_number = int(line[22:26].strip()) chain = line[21].strip() monomer_type = line[17:20].strip() pdb_monomer_labels[(chain, monomer_number)] = monomer_type alt_loc = line[16].strip() if alt_loc: pdb_has_alt_conf.add((chain, monomer_number)) structure = ampal.load_pdb(pdb_path) ampal_atom_numbers = set() ampal_has_alt_conf = set() for atom in structure.get_atoms(inc_alt_states=True): ampal_atom_numbers.add(atom.id) ampal_monomer_labels = [] for monomer in structure.get_monomers(): ampal_monomer_labels.append(monomer.mol_code) if len(monomer.states) > 1: ampal_has_alt_conf.add((monomer.parent.id, int(monomer.id))) self.assertEqual(pdb_atom_numbers, ampal_atom_numbers) self.assertEqual(Counter(pdb_monomer_labels.values()), Counter(ampal_monomer_labels)) self.assertEqual(pdb_has_alt_conf, ampal_has_alt_conf)
def setUp(self): self.ht0 = ampal.load_pdb(str(TEST_FILES_PATH / '2ht0.pdb')) self.ubq = ampal.load_pdb(str(TEST_FILES_PATH / '1ubq.pdb')) self.pdbs = [self.ht0, self.ubq]
import unittest import pathlib import ampal from ampal import analyse_protein _test_file = pathlib.Path(__file__).parent / 'testing_files' / '2ebo_1.mmol' _test_polypeptide = ampal.load_pdb(_test_file)[0] class ResiduesPerTurnTestCase(unittest.TestCase): @classmethod def setUpClass(cls): cls.rpts = list( analyse_protein.residues_per_turn(p=_test_polypeptide)) def test_residues_per_turn_length(self): self.assertEqual(len(self.rpts), len(_test_polypeptide.primitive)) def test_residues_per_turn_final_value(self): self.assertIsNone(self.rpts[-1]) def test_residues_per_turn_none_values(self): none_values = [x for x in self.rpts if x is None] self.assertAlmostEqual(len(none_values), 1) def test_residues_per_turn_index_eight(self): self.assertAlmostEqual(self.rpts[8], 3.6400862)
def setUp(self): self.ctf = ampal.load_pdb(str(TEST_FILES_PATH / '1ctf.pdb')) self.ubq = ampal.load_pdb(str(TEST_FILES_PATH / '1ubq.pdb')) self.r69 = ampal.load_pdb(str(TEST_FILES_PATH / '1r69.pdb')) self.icb = ampal.load_pdb(str(TEST_FILES_PATH / '4icb.pdb'))
def headless_destress(pdb_file: str) -> DesignMetricsOutputRow: """Running DE-STRESS in headless mode (using CLI rather than DE-STRESS user interface) for a single pdb file. Defining a function to run the analysis.create_metrics_from_pdb() on a single pdb file. Firstly the pdb file is converted to an ampal assembly object and only the ATOM residues are kept (some of the other residues can cause issues for some of the DE-STRESS metrics). A list of the DE-STRESS metrics is returned as data_row from this function. Parameters ---------- pdb_file: str This is the input pdb file. Returns ------- data_row: list This is the DE-STRESS metrics that have been calculated for the input pdb file. """ # First printing out the pdb file path print(pdb_file) # Loading in the PDB file and converting it to an ampal assembly ampal_assembly = ampal.load_pdb(str(pdb_file), path=True) # Only selecting ATOM residues and removing the other residues. # This is because some of these other residues can cause issues # for the DE-STRESS metric calculations. pdb_lines = ampal_assembly.pdb.splitlines() pdb_lines_filtered = [line for line in pdb_lines if line.startswith("ATOM")] pdb_string_filtered = "\n".join(pdb_lines_filtered) # Extracting the design name for the pdb file and the file name design_name = os.path.splitext(os.path.basename(pdb_file))[0] file_name = pdb_file # Defining a list of fields design_field_list = [ "composition_ALA", "composition_CYS", "composition_ASP", "composition_GLU", "composition_PHE", "composition_GLY", "composition_HIS", "composition_ILE", "composition_LYS", "composition_LEU", "composition_MET", "composition_ASN", "composition_PRO", "composition_GLN", "composition_ARG", "composition_SER", "composition_THR", "composition_VAL", "composition_TRP", "composition_UNK", "composition_TYR", "hydrophobic_fitness", "isoelectric_point", "mass", "num_residues", "packing_density", "budeff_total", "budeff_steric", "budeff_desolvation", "budeff_charge", "evoef2_total", "evoef2_ref_total", "evoef2_intraR_total", "evoef2_interS_total", "evoef2_interD_total", "dfire2_total", "rosetta_total", "rosetta_fa_atr", "rosetta_fa_rep", "rosetta_fa_intra_rep", "rosetta_fa_elec", "rosetta_fa_sol", "rosetta_lk_ball_wtd", "rosetta_fa_intra_sol_xover4", "rosetta_hbond_lr_bb", "rosetta_hbond_sr_bb", "rosetta_hbond_bb_sc", "rosetta_hbond_sc", "rosetta_dslf_fa13", "rosetta_rama_prepro", "rosetta_p_aa_pp", "rosetta_fa_dun", "rosetta_omega", "rosetta_pro_close", "rosetta_yhh_planarity", "aggrescan3d_total_value", "aggrescan3d_avg_value", "aggrescan3d_min_value", "aggrescan3d_max_value", ] try: # Running the DE-STRESS metrics for the pdb file design_metrics = analysis.create_metrics_from_pdb(pdb_string_filtered) # Unpacking the compisition metrics comp_metrics = unpacking_comp_metrics(design_metrics) # Creating a dictionary of all the design metrics design_metrics_output = dict( zip( design_field_list, [ comp_metrics["ALA"], comp_metrics["CYS"], comp_metrics["ASP"], comp_metrics["GLU"], comp_metrics["PHE"], comp_metrics["GLY"], comp_metrics["HIS"], comp_metrics["ILE"], comp_metrics["LYS"], comp_metrics["LEU"], comp_metrics["MET"], comp_metrics["ASN"], comp_metrics["PRO"], comp_metrics["GLN"], comp_metrics["ARG"], comp_metrics["SER"], comp_metrics["THR"], comp_metrics["VAL"], comp_metrics["TRP"], comp_metrics["UNK"], comp_metrics["TYR"], design_metrics.hydrophobic_fitness, design_metrics.isoelectric_point, design_metrics.mass, design_metrics.num_of_residues, design_metrics.packing_density, design_metrics.budeFF_results.total_energy, design_metrics.budeFF_results.steric, design_metrics.budeFF_results.desolvation, design_metrics.budeFF_results.charge, design_metrics.evoEF2_results.total, design_metrics.evoEF2_results.ref_total, design_metrics.evoEF2_results.intraR_total, design_metrics.evoEF2_results.interS_total, design_metrics.evoEF2_results.interD_total, design_metrics.dfire2_results.total, design_metrics.rosetta_results.total_score, design_metrics.rosetta_results.fa_atr, design_metrics.rosetta_results.fa_rep, design_metrics.rosetta_results.fa_intra_rep, design_metrics.rosetta_results.fa_elec, design_metrics.rosetta_results.fa_sol, design_metrics.rosetta_results.lk_ball_wtd, design_metrics.rosetta_results.fa_intra_sol_xover4, design_metrics.rosetta_results.hbond_lr_bb, design_metrics.rosetta_results.hbond_sr_bb, design_metrics.rosetta_results.hbond_bb_sc, design_metrics.rosetta_results.hbond_sc, design_metrics.rosetta_results.dslf_fa13, design_metrics.rosetta_results.rama_prepro, design_metrics.rosetta_results.p_aa_pp, design_metrics.rosetta_results.fa_dun, design_metrics.rosetta_results.omega, design_metrics.rosetta_results.pro_close, design_metrics.rosetta_results.yhh_planarity, design_metrics.aggrescan3d_results.total_value, design_metrics.aggrescan3d_results.avg_value, design_metrics.aggrescan3d_results.min_value, design_metrics.aggrescan3d_results.max_value, ], ) ) design_metrics_output except (KeyError, ValueError): # Setting all the design metrics to None design_metrics_output = dict( zip(design_field_list, [None] * len(design_field_list)) ) # Creating the design metrics output row design_metrics_output_row = DesignMetricsOutputRow( design_name=design_name, file_name=file_name, **design_metrics_output, ) return design_metrics_output_row
def headless_destress1(input_path: str) -> None: tic = time.time() # Resolving the input path that has been provided input_path = Path(input_path).resolve() # Changing directory to the input path os.chdir(input_path) # Getting a list of all the pdb files in the input path pdb_file_list = list(input_path.glob("*.pdb")) # Defining a list of headers for the csv file output headers = [ "design name", "file name", "composition: ALA", "composition: CYS", "composition: ASP", "composition: GLU", "composition: PHE", "composition: GLY", "composition: HIS", "composition: ILE", "composition: LYS", "composition: LEU", "composition: MET", "composition: ASN", "composition: PRO", "composition: GLN", "composition: ARG", "composition: SER", "composition: THR", "composition: VAL", "composition: TRP", "composition: UNK", "composition: TYR", "hydrophobic fitness", "isoelectric point (pH)", "mass (da)", "number of residues", "packing density", "budeff: total", "budeff: steric", "budeff: desolvation", "budeff: charge", "evoef2: total", "evoef2: ref total", "evoef2: intraR total", "evoef2: interS total", "evoef2 - interD total", "dfire2 - total", "rosetta - total", "rosetta - fa_atr", "rosetta - fa_rep", "rosetta - fa_intra_rep", "rosetta - fa_elec", "rosetta - fa_sol", "rosetta - lk_ball_wtd", "rosetta - fa_intra_sol_xover4", "rosetta - hbond_lr_bb", "rosetta - hbond_sr_bb", "rosetta - hbond_bb_sc", "rosetta - hbond_sc", "rosetta - dslf_fa13", "rosetta - rama_prepro", "rosetta - p_aa_pp", "rosetta - fa_dun", "rosetta - omega", "rosetta - pro_close", "rosetta - yhh_planarity", "aggrescan3d: total_value", "aggrescan3d: avg_value", "aggrescan3d: min_value", "aggrescan3d: max_value", ] # Creating a data list to collect the results from each PDB file data_list = [] # Looping through each PDB file and calculating the DE-STRESS design metrics for pdb_file in pdb_file_list: print(pdb_file) ampal_assembly = ampal.load_pdb(str(pdb_file), path=True) # Only selecting ATOM residues pdb_lines = ampal_assembly.pdb.splitlines() pdb_lines_filtered = [line for line in pdb_lines if line.startswith("ATOM")] pdb_string_filtered = "\n".join(pdb_lines_filtered) # Extracting all the values for inserting into the csv file design_name = os.path.splitext(os.path.basename(pdb_file))[0] file_name = pdb_file try: design_metrics = analysis.create_metrics_from_pdb(pdb_string_filtered) # Unpacking the compisition metrics comp_metrics = unpacking_comp_metrics(design_metrics) data_row = [ design_name, file_name, comp_metrics["ALA"], comp_metrics["CYS"], comp_metrics["ASP"], comp_metrics["GLU"], comp_metrics["PHE"], comp_metrics["GLY"], comp_metrics["HIS"], comp_metrics["ILE"], comp_metrics["LYS"], comp_metrics["LEU"], comp_metrics["MET"], comp_metrics["ASN"], comp_metrics["PRO"], comp_metrics["GLN"], comp_metrics["ARG"], comp_metrics["SER"], comp_metrics["THR"], comp_metrics["VAL"], comp_metrics["TRP"], comp_metrics["UNK"], comp_metrics["TYR"], design_metrics.hydrophobic_fitness, design_metrics.isoelectric_point, design_metrics.mass, design_metrics.num_of_residues, design_metrics.packing_density, design_metrics.budeFF_results.total_energy, design_metrics.budeFF_results.steric, design_metrics.budeFF_results.desolvation, design_metrics.budeFF_results.charge, design_metrics.evoEF2_results.total, design_metrics.evoEF2_results.ref_total, design_metrics.evoEF2_results.intraR_total, design_metrics.evoEF2_results.interS_total, design_metrics.evoEF2_results.interD_total, design_metrics.dfire2_results.total, design_metrics.rosetta_results.total_score, design_metrics.rosetta_results.fa_atr, design_metrics.rosetta_results.fa_rep, design_metrics.rosetta_results.fa_intra_rep, design_metrics.rosetta_results.fa_elec, design_metrics.rosetta_results.fa_sol, design_metrics.rosetta_results.lk_ball_wtd, design_metrics.rosetta_results.fa_intra_sol_xover4, design_metrics.rosetta_results.hbond_lr_bb, design_metrics.rosetta_results.hbond_sr_bb, design_metrics.rosetta_results.hbond_bb_sc, design_metrics.rosetta_results.hbond_sc, design_metrics.rosetta_results.dslf_fa13, design_metrics.rosetta_results.rama_prepro, design_metrics.rosetta_results.p_aa_pp, design_metrics.rosetta_results.fa_dun, design_metrics.rosetta_results.omega, design_metrics.rosetta_results.pro_close, design_metrics.rosetta_results.yhh_planarity, design_metrics.aggrescan3d_results.total_value, design_metrics.aggrescan3d_results.avg_value, design_metrics.aggrescan3d_results.min_value, design_metrics.aggrescan3d_results.max_value, ] data_list.append(data_row) except (KeyError, ValueError): data_list.append(design_name) data_row = [None] * (len(headers) - 1) data_list.append(data_row) # Opening csv to insert into with open("design_data.csv", "w", encoding="UTF8") as f: writer = csv.writer(f) # Writing the header writer.writerow(headers) # write the data for i in range(0, len(data_list)): writer.writerow(data_list[i]) toc = time.time() print("Done in {:.4f} seconds".format(toc - tic))
def main(argv=None): try: assert sys.version_info >= config.required_python except Exception as e: sys.stderr.write("\nFATAL ERROR: Wrong Python Version:\n\n" "We shall NOT continue.\nWe need python {}.{} or greater" " for using the budeAlaScan.\n".format(config.required_python[0], config.required_python[1]) ) sys.stderr.write("\nWe found this Python Version:\n") sys.stderr.write("\nWe found this Python Version:\n") sys.stderr.write(sys.version) sys.stderr.write("\n") return 2 if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) program_fulldesc = __import__('__main__').__doc__.split("\n") program_longdesc = '\n'.join(program_fulldesc[1:21]) program_license = '''%s Created by Amaurys Ávila Ibarra on %s. Copyright 2019 University of Bristol. All rights reserved. Licensed under the Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0 Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE %s [%s] [options] ''' % (program_longdesc, str(__date__), program_name, __sub_commands__) full_desc = """ This mode will do a full saturation mutagenesis. In this mode all residues in the sequence will be mutated to these default residues: [{}] If the option to disable rotamer correction is given, the option to activate residues for rotamer correction will be ignored. """.format(", ".join(config.mutate_res)) manual_desc = """ This mode will do mutagenesis using the given residues. In this mode the mutagenesis will be done by mutating every residue in the sequence to the list of residues given in the command line. Resiudes given in the command line MUST be in this list. [{}] If the option to disable rotamer correction is given, the option to activate residues for rotamer correction will be ignored. """.format(", ".join(config.legal_aa)) try: # Setup argument parser parser = argparse.ArgumentParser(description=program_license, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-V', '--version', action='version', version=program_version_message) parser.add_argument('-v', '--verbose', dest="verbose_g", action='store_true', help="Output information during execution [default: %(default)s].") parser.add_argument('-t', '--turn-plot-off', dest="no_plot_g", action='store_false', help="Do not show plots [default: %(default)s].") parser.add_argument('-i', '--disable-rotamer-correction', dest="no_rot_g", action='store_true', help="No residues will be activated for rotamer correction [default: %(default)s].") subparsers = parser.add_subparsers(title="Mutagenesis sub-commands", description="Mutagenesis modes.", help="What mutagenesis mode to do.") # Full Mode parser_full = subparsers.add_parser('full', description=full_desc, help="Do mutagenesis with default list of mutable residues.", formatter_class=argparse.RawDescriptionHelpFormatter) parser_full.add_argument(dest="scan_mode", help=argparse.SUPPRESS, nargs='?', const=True, default='full') parser_full.add_argument('-v', '--verbose', dest="verbose", action='store_true', help="Output information during execution [default: %(default)s].") parser_full.add_argument('-t', '--turn-plot-off', dest="no_plot", action='store_false', help="Do not show plots [default: %(default)s].") # Mandatory options for full mode. mandatory_full = parser_full.add_argument_group("Mandatory Arguments") mandatory_full.add_argument("-p", "--pdb-file", dest="pdb_file_name", help="Name of the PDB file.", metavar="myPDB.pdb", required=True) mandatory_full.add_argument("-l", "--ligand-chains", dest="lig_chains", help="Ligand chain(s) to do the mutagenesis on.", metavar="A", type=str, nargs='+', required=True) # Default options for full mode. default_full = parser_full.add_argument_group("Default Arguments") default_full.add_argument("-a", "--residues-to-activate", dest="res2activate", help="Residues to activate for rotamer correction. [default: %(default)s].", metavar="D E", type=str, nargs='+', default="DERKH") default_full.add_argument('-i', '--disable-rotamer-correction', dest="no_rot_m", action='store_true', help="No residues will be activated for rotamer correction [default: %(default)s].") # manual selection parser_manual = subparsers.add_parser('manual', description=manual_desc, help="Do mutagenesis with the given resisues, [F M I L Y W].", formatter_class=argparse.RawDescriptionHelpFormatter) parser_manual.add_argument(dest="scan_mode", help=argparse.SUPPRESS, nargs='?', const=True, default='manual') parser_manual.add_argument('-v', '--verbose', dest="verbose", action='store_true', help="Output information during execution [default: %(default)s].") parser_manual.add_argument('-t', '--turn-plot-off', dest="no_plot", action='store_false', help="Do not show plots [default: %(default)s].") # Mandatory manual options. mandatory_manual = parser_manual.add_argument_group("Mandatory Arguments") mandatory_manual.add_argument("-p", "--pdb-file", dest="pdb_file_name", help="Name of the PDB file.", metavar="myPDB.pdb", required=True) mandatory_manual.add_argument("-l", "--ligand-chains", dest="lig_chains", help="Ligand chain(s) to do the mutagenesis on.", metavar="A", type=str, nargs='+', required=True) mandatory_manual.add_argument("-m", "--residues-for-mutagenesis", dest="mut_resisues", help="One letter code of residues to mutate to.", metavar="W", type=str, nargs='+', required=True) # Default options for full mode. default_manual = parser_manual.add_argument_group("Default Arguments") default_manual.add_argument("-a", "--residues-to-activate", dest="res2activate", help="Residues to activate for rotamer correction. [default: %(default)s].", metavar="D E", type=str, nargs='+', default="DERKH") default_manual.add_argument('-i', '--disable-rotamer-correction', dest="no_rot_m", action='store_true', help="No residues will be activated for rotamer correction [default: %(default)s].") # Process arguments args = parser.parse_args() config.pdb_basename = os.path.basename(args.pdb_file_name)[:-4] if args.verbose or args.verbose_g: config.verbose = True if args.no_rot_g or args.no_rot_m: config.do_rotamer_correction = False if not args.no_plot or not args.no_plot_g: config.showplots = False if config.verbose: print("{} started for PDB ID {} on: {}".format(program_name, config.pdb_basename, strftime(config.date_fmt, localtime()))) if config.do_rotamer_correction: print("Rotamer correction is active.") else: print("Rotamer correction is NOT active.") if not check_executables(): print("FATAL ERROR: Executable(s) not found.", file=sys.stderr) return 2 if not os.path.isfile(args.pdb_file_name): print("FATAL ERROR: The PDB file [{}] cannot be found.".format(args.pdb_file_name), file=sys.stderr) return 2 try: my_ampal = ampal.load_pdb(args.pdb_file_name) except Exception as e: sys.stderr.write("\nFatal Error: '{}' contains non standard amino acids.\n".format(args.pdb_file_name)) sys.stderr.write("Error Info:\n{}\n".format(repr(e))) sys.exit(2) config.is_multimodel = is_multi_model(my_ampal) init_directories() start_mutagenesis(my_ampal, args) if config.verbose: print("{} Finished on: ".format(program_name), strftime(config.date_fmt, localtime())) except KeyboardInterrupt: ### handle keyboard interrupt ### return 0 except Exception as e: indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") sys.stderr.write(indent + " for help use --help\n\n") return 2 return 0
def setUp(self): test_file_path = str(TEST_FILE_FOLDER / '3qy1.pdb') self.structure = load_pdb(test_file_path) tag_dssp_data(self.structure)
print(subdirs) # Printing how many directories the script will run for print("Fixing the pdb files for " + str(len(subdirs)) + " directories") # 2. Looping through the sub directories and fixing the pdb files---------- # Looping through all the sub directories for subdir in subdirs: # Extracting the name of the subdir subdir_name = os.path.basename(subdir) print("Subdir: " + subdir_name) # Loading the native structure native_structure = ampal.load_pdb(subdir + "/" + "native.pdb") # Deleting hydrogen atoms for residue in native_structure.get_monomers(): del_keys = [] for (k, v) in residue.atoms.items(): if v.element == "H": del_keys.append(k) for k in del_keys: del residue.atoms[k] # Renaming the native pdb file to include the pdb id and saving it with open(subdir + "/" + subdir_name + "_native.pdb", "w") as outf: outf.write(native_structure.pdb) # Extracting all the file names of the decoy structures