Ejemplo n.º 1
0
    def test_pacc_parallel_antiparallel(self):
        file_name = os.path.join(self.test_cc_folder, 'p3_p4.pdb')
        cc = ampal.load_pdb(file_name)
        cca = pacc.PACCAnalysis(cc)
        radius = np.mean(cca.radii_layers)
        assert radius < 5.1

        file_name = os.path.join(self.test_cc_folder, 'APH.pdb')
        cc = ampal.load_pdb(file_name)
        cca = pacc.PACCAnalysis(cc)
        radius = np.mean(cca.radii_layers)
        assert radius < 5.1
Ejemplo n.º 2
0
    def pdb_check(self, pdb_file_path):
        with open(pdb_file_path, 'r') as inf:
            pdb_file = inf.read()
        structure = ampal.load_pdb(pdb_file_path)

        # Compare the number of lines in the output ampal pdb with the original
        pdb_lines = [
            x for x in pdb_file.splitlines()
            if (x.startswith('ATOM') or x.startswith('HETATM'))
        ]
        ampal_pdb_lines = [
            x for x in structure.make_pdb(ligands=True,
                                          alt_states=True).splitlines()
            if (x.startswith('ATOM') or x.startswith('HETATM'))
        ]
        self.assertEqual(len(pdb_lines), len(ampal_pdb_lines))

        # Compare the atomic composition
        pdb_atomic = Counter([x[-4:-2].strip() for x in pdb_lines])
        ampal_atomic = Counter([x[-4:-2].strip() for x in ampal_pdb_lines])
        self.assertEqual(pdb_atomic, ampal_atomic)

        # Compare the residue composition
        pdb_atomic = Counter([x[17:20].strip() for x in pdb_lines])
        ampal_atomic = Counter([x[17:20].strip() for x in ampal_pdb_lines])
        self.assertEqual(pdb_atomic, ampal_atomic)
        return
Ejemplo n.º 3
0
def create_biounit_entry(
    pdb_path: Path,
    biounit_num: int,
    pdb_entry: PdbModel,
    is_deposited_pdb: bool,
    preferred_biol_unit: tp.Optional[int],
) -> BiolUnitModel:
    with gz.open(str(pdb_path)) as inf:
        contents = inf.read().decode()
    pdb_ampal = ampal.load_pdb(contents, pdb_id=pdb_path.name, path=False)
    is_preferred_biol_unit = (False if preferred_biol_unit is None else
                              biounit_num == preferred_biol_unit)
    biounit_model = BiolUnitModel(
        biol_unit_number=biounit_num,
        is_deposited_pdb=is_deposited_pdb,
        is_preferred_biol_unit=is_preferred_biol_unit,
        pdb=pdb_entry,
    )
    if isinstance(pdb_ampal, ampal.Assembly):
        states = [create_state_entry(pdb_ampal, 0, biounit_model)]
    else:
        states = []
        for i, state in enumerate(pdb_ampal):
            states.append(create_state_entry(state, i, biounit_model))
    return biounit_model
def test_cb_atom_filter(residue_number: int):
    assembly = ampal.load_pdb(str(TEST_DATA_DIR / "3qy1.pdb"))
    focus_residue = assembly[0][residue_number]
    backbone_atoms = ("N", "CA", "C", "O", "CB")

    for atom in focus_residue:
        filtered_atom = True if atom.res_label in backbone_atoms else False
        filtered_scenario = cfds.keep_sidechain_cb_atom_filter(atom)
        assert filtered_atom == filtered_scenario, f"Expected {atom.res_label} to return {filtered_atom} after filter"
Ejemplo n.º 5
0
def create_metrics_from_pdb(pdb_string: str) -> DesignMetrics:
    ampal_assembly = ampal.load_pdb(pdb_string, path=False)
    # relabel everything to remove annoying insertion codes!
    ampal_assembly.relabel_all()
    if isinstance(ampal_assembly, ampal.AmpalContainer):
        ampal_assembly = ampal_assembly[0]
    if not ampal_assembly._molecules:
        raise ValueError("No PDB format data found in file.")
    design_metrics = analyse_design(ampal_assembly)
    return design_metrics
Ejemplo n.º 6
0
 def setUp(self):
     test_files = [
         str(TEST_FILE_FOLDER / x)
         for x in ['1ek9.pdb', '2ht0.pdb', '3qy1.pdb']
     ]
     test_structures = [ampal.load_pdb(x) for x in test_files]
     self.test_polypeptides = [
         p for p in itertools.chain(*test_structures)
         if isinstance(p, ampal.Polypeptide)
     ]
Ejemplo n.º 7
0
 def setUp(self):
     self.cis_tas = [[-179, 120, -40], [0, -60, 20]]
     self.cis_dipeptide = isambard.specifications.TAPolypeptide(
         self.cis_tas)
     self.trans_tas = [[0, -60, 20], [-179, 120, -40]]
     self.trans_dipeptide = isambard.specifications.TAPolypeptide(
         self.trans_tas)
     test_file = str(TEST_FILES_PATH / '1ek9.pdb')
     test_structure = ampal.load_pdb(test_file)
     self.test_polypeptides = [
         p for p in test_structure if isinstance(p, ampal.Polypeptide)
     ]
def test_create_residue_frame_backbone_only(residue_number):
    assembly = ampal.load_pdb(str(TEST_DATA_DIR / "3qy1.pdb"))
    focus_residue = assembly[0][residue_number]

    # Make sure that residue correctly aligns peptide plane to XY
    cfds.align_to_residue_plane(focus_residue)
    assert np.array_equal(focus_residue["CA"].array, (
        0,
        0,
        0,
    )), "The CA atom should lie on the origin."
    assert np.isclose(focus_residue["N"].x,
                      0), "The nitrogen atom should lie on XY."
    assert np.isclose(focus_residue["N"].z,
                      0), "The nitrogen atom should lie on XY."
    assert np.isclose(focus_residue["C"].z,
                      0), "The carbon atom should lie on XY."
    # Make sure that all relevant atoms are pulled into the frame
    frame_edge_length = 12.0
    voxels_per_side = 21
    centre = voxels_per_side // 2
    max_dist = np.sqrt(((frame_edge_length / 2)**2) * 3)
    for atom in (a for a in assembly.get_atoms(ligands=False)
                 if cfds.within_frame(frame_edge_length, a)):
        assert g.distance(atom, (0, 0, 0)) <= max_dist, (
            "All atoms filtered by `within_frame` should be within "
            "`frame_edge_length/2` of the origin")

    # Make sure that aligned residue sits on XY after it is discretized
    single_res_assembly = ampal.Assembly(molecules=ampal.Polypeptide(
        monomers=copy.deepcopy(focus_residue).backbone))
    # Need to reassign the parent so that the residue is the only thing in the assembly
    single_res_assembly[0].parent = single_res_assembly
    single_res_assembly[0][0].parent = single_res_assembly[0]
    # Obtain atom encoder:
    codec = cfds.Codec.CNO()
    array = cfds.create_residue_frame(single_res_assembly[0][0],
                                      frame_edge_length,
                                      voxels_per_side,
                                      encode_cb=False,
                                      codec=codec)
    np.testing.assert_array_equal(array[centre, centre, centre],
                                  [True, False, False],
                                  err_msg="The central atom should be CA.")
    nonzero_indices = list(zip(*np.nonzero(array)))
    assert (len(nonzero_indices) == 4
            ), "There should be only 4 backbone atoms in this frame"
    nonzero_on_xy_indices = list(zip(*np.nonzero(array[:, :, centre])))
    assert (3 <= len(nonzero_on_xy_indices) <=
            4), "N, CA and C should lie on the xy plane."
Ejemplo n.º 9
0
def pack_side_chains_scwrl(assembly,
                           sequences,
                           rigid_rotamer_model=True,
                           hydrogens=False):
    """Packs side chains onto a protein structure.

    Parameters
    ----------
    assembly : AMPAL Assembly
        AMPAL object containing some protein structure.
    sequence : [str]
        A list of amino acid sequences in single-letter code for Scwrl to pack.
    rigid_rotamer_model : bool, optional
        If True, Scwrl will use the rigid-rotamer model, which is
        faster but less accurate.
    hydrogens : bool, optional
        If False, the hydrogens produced by Scwrl will be ommitted.

    Returns
    -------
    packed_structure : AMPAL Assembly
        A new AMPAL Assembly containing the packed structure, with
        the Scwrl score in the tags.
    """
    if not scwrl_available():
        raise ValueError('Scwrl4 is unavailable on your system path.')
    protein = [x for x in assembly if isinstance(x, ampal.Polypeptide)]
    total_seq_len = sum([len(x) for x in sequences])
    total_aa_len = sum([len(x) for x in protein])
    if total_seq_len != total_aa_len:
        raise ValueError('Total sequence length ({}) does not match '
                         'total Polypeptide length ({}).'.format(
                             total_seq_len, total_aa_len))
    if len(protein) != len(sequences):
        raise ValueError('Number of sequences ({}) does not match '
                         'number of Polypeptides ({}).'.format(
                             len(sequences), len(protein)))
    scwrl_std_out, scwrl_pdb = run_scwrl(
        assembly.pdb,
        ''.join(sequences),
        path=False,
        rigid_rotamer_model=rigid_rotamer_model,
        hydrogens=hydrogens)
    packed_structure, scwrl_score = parse_scwrl_out(scwrl_std_out, scwrl_pdb)
    new_assembly = ampal.load_pdb(packed_structure, path=False)
    new_assembly.tags['scwrl_score'] = scwrl_score
    return new_assembly
def test_make_frame_dataset_as_gaussian():
    """Tests the creation of a frame data set."""
    test_file = TEST_DATA_DIR / "1ubq.pdb"
    frame_edge_length = 18.0
    voxels_per_side = 31

    ampal_1ubq = ampal.load_pdb(str(test_file))
    for atom in ampal_1ubq.get_atoms():
        if not cfds.default_atom_filter(atom):
            del atom.parent.atoms[atom.res_label]
            del atom
    with tempfile.TemporaryDirectory() as tmpdir:
        # Obtain atom encoder:
        codec = cfds.Codec.CNO()
        output_file_path = cfds.make_frame_dataset(
            structure_files=[test_file],
            output_folder=tmpdir,
            name="test_dataset",
            frame_edge_length=frame_edge_length,
            voxels_per_side=voxels_per_side,
            verbosity=1,
            require_confirmation=False,
            codec=codec,
            voxels_as_gaussian=True,
        )
        with h5py.File(output_file_path, "r") as dataset:
            for n in range(1, 77):
                # check that the frame for all the data frames match between the input
                # arrays and the ones that come out of the HDF5 data set
                residue_number = str(n)
                test_frame = cfds.create_residue_frame(
                    residue=ampal_1ubq["A"][residue_number],
                    frame_edge_length=frame_edge_length,
                    voxels_per_side=voxels_per_side,
                    encode_cb=False,
                    codec=codec,
                    voxels_as_gaussian=True,
                )
                hdf5_array = dataset["1ubq"]["A"][residue_number][()]
                npt.assert_array_equal(
                    hdf5_array,
                    test_frame,
                    err_msg=
                    ("The frame in the HDF5 data set should be the same as the "
                     "input frame."),
                )
Ejemplo n.º 11
0
 def test_pacc_on_ccs(self):
     """Tests PACC for each file in testing_files/test_ccs"""
     assert len(self.pdb_paths) > 0
     for file in self.pdb_paths:
         cc = ampal.load_pdb(file)
         ccr = ampal.Assembly()
         for h in cc:
             ccr.append(h[1:28])
         cc_pacc = pacc.PACCAnalysis(ccr)
         # Check register assignment
         register, fit = cc_pacc.heptad_register()
         self.check_register(register, cc_pacc.cc_len)
         for p in [
                 cc_pacc.radii_layers, cc_pacc.alpha_layers,
                 cc_pacc.ca_layers
         ]:
             self.check_parameters(
                 cc_pacc.calc_average_parameters(p)[0], cc_pacc.cc_len)
     return
Ejemplo n.º 12
0
    def check_ampal_contents(self, pdb_path):
        """Tests if all atoms present are correctly formatted."""
        with open(pdb_path, 'r') as inf:
            pdb_lines = inf.readlines()
        pdb_atom_numbers = set()
        pdb_monomer_labels = {}
        pdb_has_alt_conf = set()
        for line in pdb_lines:
            record_name = line[:6].strip()
            if (record_name == 'ATOM') or (record_name == 'HETATM'):
                atom_number = int(line[6:11].strip())
                pdb_atom_numbers.add(atom_number)

                monomer_number = int(line[22:26].strip())
                chain = line[21].strip()
                monomer_type = line[17:20].strip()
                pdb_monomer_labels[(chain, monomer_number)] = monomer_type

                alt_loc = line[16].strip()
                if alt_loc:
                    pdb_has_alt_conf.add((chain, monomer_number))

        structure = ampal.load_pdb(pdb_path)
        ampal_atom_numbers = set()
        ampal_has_alt_conf = set()
        for atom in structure.get_atoms(inc_alt_states=True):
            ampal_atom_numbers.add(atom.id)

        ampal_monomer_labels = []
        for monomer in structure.get_monomers():
            ampal_monomer_labels.append(monomer.mol_code)
            if len(monomer.states) > 1:
                ampal_has_alt_conf.add((monomer.parent.id, int(monomer.id)))

        self.assertEqual(pdb_atom_numbers, ampal_atom_numbers)
        self.assertEqual(Counter(pdb_monomer_labels.values()),
                         Counter(ampal_monomer_labels))
        self.assertEqual(pdb_has_alt_conf, ampal_has_alt_conf)
 def setUp(self):
     self.ht0 = ampal.load_pdb(str(TEST_FILES_PATH / '2ht0.pdb'))
     self.ubq = ampal.load_pdb(str(TEST_FILES_PATH / '1ubq.pdb'))
     self.pdbs = [self.ht0, self.ubq]
import unittest
import pathlib

import ampal
from ampal import analyse_protein


_test_file = pathlib.Path(__file__).parent / 'testing_files' / '2ebo_1.mmol'
_test_polypeptide = ampal.load_pdb(_test_file)[0]


class ResiduesPerTurnTestCase(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.rpts = list(
            analyse_protein.residues_per_turn(p=_test_polypeptide))

    def test_residues_per_turn_length(self):
        self.assertEqual(len(self.rpts), len(_test_polypeptide.primitive))

    def test_residues_per_turn_final_value(self):
        self.assertIsNone(self.rpts[-1])

    def test_residues_per_turn_none_values(self):
        none_values = [x for x in self.rpts if x is None]
        self.assertAlmostEqual(len(none_values), 1)

    def test_residues_per_turn_index_eight(self):
        self.assertAlmostEqual(self.rpts[8], 3.6400862)

Ejemplo n.º 15
0
 def setUp(self):
     self.ctf = ampal.load_pdb(str(TEST_FILES_PATH / '1ctf.pdb'))
     self.ubq = ampal.load_pdb(str(TEST_FILES_PATH / '1ubq.pdb'))
     self.r69 = ampal.load_pdb(str(TEST_FILES_PATH / '1r69.pdb'))
     self.icb = ampal.load_pdb(str(TEST_FILES_PATH / '4icb.pdb'))
Ejemplo n.º 16
0
def headless_destress(pdb_file: str) -> DesignMetricsOutputRow:

    """Running DE-STRESS in headless mode (using CLI rather than
    DE-STRESS user interface) for a single pdb file.

    Defining a function to run the analysis.create_metrics_from_pdb()
    on a single pdb file. Firstly the pdb file is converted to an
    ampal assembly object and only the ATOM residues are kept (some
    of the other residues can cause issues for some of the DE-STRESS
    metrics). A list of the DE-STRESS metrics is returned as data_row
    from this function.

    Parameters
    ----------
    pdb_file: str
        This is the input pdb file.

    Returns
    -------
    data_row: list
        This is the DE-STRESS metrics that have been calculated
        for the input pdb file.

    """

    # First printing out the pdb file path
    print(pdb_file)

    # Loading in the PDB file and converting it to an ampal assembly
    ampal_assembly = ampal.load_pdb(str(pdb_file), path=True)

    # Only selecting ATOM residues and removing the other residues.
    # This is because some of these other residues can cause issues
    # for the DE-STRESS metric calculations.
    pdb_lines = ampal_assembly.pdb.splitlines()
    pdb_lines_filtered = [line for line in pdb_lines if line.startswith("ATOM")]
    pdb_string_filtered = "\n".join(pdb_lines_filtered)

    # Extracting the design name for the pdb file and the file name
    design_name = os.path.splitext(os.path.basename(pdb_file))[0]
    file_name = pdb_file

    # Defining a list of fields
    design_field_list = [
        "composition_ALA",
        "composition_CYS",
        "composition_ASP",
        "composition_GLU",
        "composition_PHE",
        "composition_GLY",
        "composition_HIS",
        "composition_ILE",
        "composition_LYS",
        "composition_LEU",
        "composition_MET",
        "composition_ASN",
        "composition_PRO",
        "composition_GLN",
        "composition_ARG",
        "composition_SER",
        "composition_THR",
        "composition_VAL",
        "composition_TRP",
        "composition_UNK",
        "composition_TYR",
        "hydrophobic_fitness",
        "isoelectric_point",
        "mass",
        "num_residues",
        "packing_density",
        "budeff_total",
        "budeff_steric",
        "budeff_desolvation",
        "budeff_charge",
        "evoef2_total",
        "evoef2_ref_total",
        "evoef2_intraR_total",
        "evoef2_interS_total",
        "evoef2_interD_total",
        "dfire2_total",
        "rosetta_total",
        "rosetta_fa_atr",
        "rosetta_fa_rep",
        "rosetta_fa_intra_rep",
        "rosetta_fa_elec",
        "rosetta_fa_sol",
        "rosetta_lk_ball_wtd",
        "rosetta_fa_intra_sol_xover4",
        "rosetta_hbond_lr_bb",
        "rosetta_hbond_sr_bb",
        "rosetta_hbond_bb_sc",
        "rosetta_hbond_sc",
        "rosetta_dslf_fa13",
        "rosetta_rama_prepro",
        "rosetta_p_aa_pp",
        "rosetta_fa_dun",
        "rosetta_omega",
        "rosetta_pro_close",
        "rosetta_yhh_planarity",
        "aggrescan3d_total_value",
        "aggrescan3d_avg_value",
        "aggrescan3d_min_value",
        "aggrescan3d_max_value",
    ]

    try:

        # Running the DE-STRESS metrics for the pdb file
        design_metrics = analysis.create_metrics_from_pdb(pdb_string_filtered)

        # Unpacking the compisition metrics
        comp_metrics = unpacking_comp_metrics(design_metrics)

        # Creating a dictionary of all the design metrics
        design_metrics_output = dict(
            zip(
                design_field_list,
                [
                    comp_metrics["ALA"],
                    comp_metrics["CYS"],
                    comp_metrics["ASP"],
                    comp_metrics["GLU"],
                    comp_metrics["PHE"],
                    comp_metrics["GLY"],
                    comp_metrics["HIS"],
                    comp_metrics["ILE"],
                    comp_metrics["LYS"],
                    comp_metrics["LEU"],
                    comp_metrics["MET"],
                    comp_metrics["ASN"],
                    comp_metrics["PRO"],
                    comp_metrics["GLN"],
                    comp_metrics["ARG"],
                    comp_metrics["SER"],
                    comp_metrics["THR"],
                    comp_metrics["VAL"],
                    comp_metrics["TRP"],
                    comp_metrics["UNK"],
                    comp_metrics["TYR"],
                    design_metrics.hydrophobic_fitness,
                    design_metrics.isoelectric_point,
                    design_metrics.mass,
                    design_metrics.num_of_residues,
                    design_metrics.packing_density,
                    design_metrics.budeFF_results.total_energy,
                    design_metrics.budeFF_results.steric,
                    design_metrics.budeFF_results.desolvation,
                    design_metrics.budeFF_results.charge,
                    design_metrics.evoEF2_results.total,
                    design_metrics.evoEF2_results.ref_total,
                    design_metrics.evoEF2_results.intraR_total,
                    design_metrics.evoEF2_results.interS_total,
                    design_metrics.evoEF2_results.interD_total,
                    design_metrics.dfire2_results.total,
                    design_metrics.rosetta_results.total_score,
                    design_metrics.rosetta_results.fa_atr,
                    design_metrics.rosetta_results.fa_rep,
                    design_metrics.rosetta_results.fa_intra_rep,
                    design_metrics.rosetta_results.fa_elec,
                    design_metrics.rosetta_results.fa_sol,
                    design_metrics.rosetta_results.lk_ball_wtd,
                    design_metrics.rosetta_results.fa_intra_sol_xover4,
                    design_metrics.rosetta_results.hbond_lr_bb,
                    design_metrics.rosetta_results.hbond_sr_bb,
                    design_metrics.rosetta_results.hbond_bb_sc,
                    design_metrics.rosetta_results.hbond_sc,
                    design_metrics.rosetta_results.dslf_fa13,
                    design_metrics.rosetta_results.rama_prepro,
                    design_metrics.rosetta_results.p_aa_pp,
                    design_metrics.rosetta_results.fa_dun,
                    design_metrics.rosetta_results.omega,
                    design_metrics.rosetta_results.pro_close,
                    design_metrics.rosetta_results.yhh_planarity,
                    design_metrics.aggrescan3d_results.total_value,
                    design_metrics.aggrescan3d_results.avg_value,
                    design_metrics.aggrescan3d_results.min_value,
                    design_metrics.aggrescan3d_results.max_value,
                ],
            )
        )

        design_metrics_output

    except (KeyError, ValueError):

        # Setting all the design metrics to None
        design_metrics_output = dict(
            zip(design_field_list, [None] * len(design_field_list))
        )

    # Creating the design metrics output row
    design_metrics_output_row = DesignMetricsOutputRow(
        design_name=design_name,
        file_name=file_name,
        **design_metrics_output,
    )

    return design_metrics_output_row
Ejemplo n.º 17
0
def headless_destress1(input_path: str) -> None:

    tic = time.time()

    # Resolving the input path that has been provided
    input_path = Path(input_path).resolve()

    # Changing directory to the input path
    os.chdir(input_path)

    # Getting a list of all the pdb files in the input path
    pdb_file_list = list(input_path.glob("*.pdb"))

    # Defining a list of headers for the csv file output
    headers = [
        "design name",
        "file name",
        "composition: ALA",
        "composition: CYS",
        "composition: ASP",
        "composition: GLU",
        "composition: PHE",
        "composition: GLY",
        "composition: HIS",
        "composition: ILE",
        "composition: LYS",
        "composition: LEU",
        "composition: MET",
        "composition: ASN",
        "composition: PRO",
        "composition: GLN",
        "composition: ARG",
        "composition: SER",
        "composition: THR",
        "composition: VAL",
        "composition: TRP",
        "composition: UNK",
        "composition: TYR",
        "hydrophobic fitness",
        "isoelectric point (pH)",
        "mass (da)",
        "number of residues",
        "packing density",
        "budeff: total",
        "budeff: steric",
        "budeff: desolvation",
        "budeff: charge",
        "evoef2: total",
        "evoef2: ref total",
        "evoef2: intraR total",
        "evoef2: interS total",
        "evoef2 - interD total",
        "dfire2 - total",
        "rosetta - total",
        "rosetta - fa_atr",
        "rosetta - fa_rep",
        "rosetta - fa_intra_rep",
        "rosetta - fa_elec",
        "rosetta - fa_sol",
        "rosetta - lk_ball_wtd",
        "rosetta - fa_intra_sol_xover4",
        "rosetta - hbond_lr_bb",
        "rosetta - hbond_sr_bb",
        "rosetta - hbond_bb_sc",
        "rosetta - hbond_sc",
        "rosetta - dslf_fa13",
        "rosetta - rama_prepro",
        "rosetta - p_aa_pp",
        "rosetta - fa_dun",
        "rosetta - omega",
        "rosetta - pro_close",
        "rosetta - yhh_planarity",
        "aggrescan3d: total_value",
        "aggrescan3d: avg_value",
        "aggrescan3d: min_value",
        "aggrescan3d: max_value",
    ]

    # Creating a data list to collect the results from each PDB file
    data_list = []

    # Looping through each PDB file and calculating the DE-STRESS design metrics
    for pdb_file in pdb_file_list:
        print(pdb_file)

        ampal_assembly = ampal.load_pdb(str(pdb_file), path=True)

        # Only selecting ATOM residues
        pdb_lines = ampal_assembly.pdb.splitlines()
        pdb_lines_filtered = [line for line in pdb_lines if line.startswith("ATOM")]
        pdb_string_filtered = "\n".join(pdb_lines_filtered)

        # Extracting all the values for inserting into the csv file
        design_name = os.path.splitext(os.path.basename(pdb_file))[0]
        file_name = pdb_file

        try:

            design_metrics = analysis.create_metrics_from_pdb(pdb_string_filtered)

            # Unpacking the compisition metrics
            comp_metrics = unpacking_comp_metrics(design_metrics)

            data_row = [
                design_name,
                file_name,
                comp_metrics["ALA"],
                comp_metrics["CYS"],
                comp_metrics["ASP"],
                comp_metrics["GLU"],
                comp_metrics["PHE"],
                comp_metrics["GLY"],
                comp_metrics["HIS"],
                comp_metrics["ILE"],
                comp_metrics["LYS"],
                comp_metrics["LEU"],
                comp_metrics["MET"],
                comp_metrics["ASN"],
                comp_metrics["PRO"],
                comp_metrics["GLN"],
                comp_metrics["ARG"],
                comp_metrics["SER"],
                comp_metrics["THR"],
                comp_metrics["VAL"],
                comp_metrics["TRP"],
                comp_metrics["UNK"],
                comp_metrics["TYR"],
                design_metrics.hydrophobic_fitness,
                design_metrics.isoelectric_point,
                design_metrics.mass,
                design_metrics.num_of_residues,
                design_metrics.packing_density,
                design_metrics.budeFF_results.total_energy,
                design_metrics.budeFF_results.steric,
                design_metrics.budeFF_results.desolvation,
                design_metrics.budeFF_results.charge,
                design_metrics.evoEF2_results.total,
                design_metrics.evoEF2_results.ref_total,
                design_metrics.evoEF2_results.intraR_total,
                design_metrics.evoEF2_results.interS_total,
                design_metrics.evoEF2_results.interD_total,
                design_metrics.dfire2_results.total,
                design_metrics.rosetta_results.total_score,
                design_metrics.rosetta_results.fa_atr,
                design_metrics.rosetta_results.fa_rep,
                design_metrics.rosetta_results.fa_intra_rep,
                design_metrics.rosetta_results.fa_elec,
                design_metrics.rosetta_results.fa_sol,
                design_metrics.rosetta_results.lk_ball_wtd,
                design_metrics.rosetta_results.fa_intra_sol_xover4,
                design_metrics.rosetta_results.hbond_lr_bb,
                design_metrics.rosetta_results.hbond_sr_bb,
                design_metrics.rosetta_results.hbond_bb_sc,
                design_metrics.rosetta_results.hbond_sc,
                design_metrics.rosetta_results.dslf_fa13,
                design_metrics.rosetta_results.rama_prepro,
                design_metrics.rosetta_results.p_aa_pp,
                design_metrics.rosetta_results.fa_dun,
                design_metrics.rosetta_results.omega,
                design_metrics.rosetta_results.pro_close,
                design_metrics.rosetta_results.yhh_planarity,
                design_metrics.aggrescan3d_results.total_value,
                design_metrics.aggrescan3d_results.avg_value,
                design_metrics.aggrescan3d_results.min_value,
                design_metrics.aggrescan3d_results.max_value,
            ]

            data_list.append(data_row)

        except (KeyError, ValueError):

            data_list.append(design_name)
            data_row = [None] * (len(headers) - 1)
            data_list.append(data_row)

    # Opening csv to insert into
    with open("design_data.csv", "w", encoding="UTF8") as f:
        writer = csv.writer(f)

        # Writing the header
        writer.writerow(headers)

        # write the data
        for i in range(0, len(data_list)):
            writer.writerow(data_list[i])

    toc = time.time()

    print("Done in {:.4f} seconds".format(toc - tic))
Ejemplo n.º 18
0
def main(argv=None):
    try:
        assert sys.version_info >= config.required_python
    except Exception as e:
        sys.stderr.write("\nFATAL ERROR: Wrong Python Version:\n\n"
            "We shall NOT continue.\nWe need python {}.{} or greater"
              " for using the budeAlaScan.\n".format(config.required_python[0],
                                                     config.required_python[1])
             )
        sys.stderr.write("\nWe found this Python Version:\n")
        sys.stderr.write("\nWe found this Python Version:\n")
        sys.stderr.write(sys.version)
        sys.stderr.write("\n")
        return 2

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
    program_fulldesc = __import__('__main__').__doc__.split("\n")
    program_longdesc = '\n'.join(program_fulldesc[1:21])

    program_license = '''%s

  Created by Amaurys Ávila Ibarra on %s.
  Copyright 2019 University of Bristol. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.
   
   
 
USAGE
 
%s [%s] [options]

''' % (program_longdesc, str(__date__), program_name, __sub_commands__)

    full_desc = """
    This mode will do a full saturation mutagenesis.
    
    In this mode all residues in the sequence will be mutated to these default
    residues:
    [{}]
    
    If the option to disable rotamer correction is given, the option to activate
    residues for rotamer correction will be ignored.
""".format(", ".join(config.mutate_res))

    manual_desc = """
    This mode will do mutagenesis using the given residues.
    
    In this mode the mutagenesis will be done by mutating every residue in the
    sequence to the list of residues given in the command line.
    
    Resiudes given in the command line MUST be in this list.
    [{}]
    
    If the option to disable rotamer correction is given, the option to activate
    residues for rotamer correction will be ignored.
""".format(", ".join(config.legal_aa))
    try:
        # Setup argument parser
        parser = argparse.ArgumentParser(description=program_license, formatter_class=argparse.RawDescriptionHelpFormatter)
        parser.add_argument('-V', '--version', action='version', version=program_version_message)
        parser.add_argument('-v', '--verbose', dest="verbose_g", action='store_true', help="Output information during execution [default: %(default)s].")
        parser.add_argument('-t', '--turn-plot-off', dest="no_plot_g", action='store_false', help="Do not show plots [default: %(default)s].")
        parser.add_argument('-i', '--disable-rotamer-correction', dest="no_rot_g", action='store_true', help="No residues will be activated for rotamer correction [default: %(default)s].")

        subparsers = parser.add_subparsers(title="Mutagenesis sub-commands", description="Mutagenesis modes.", help="What mutagenesis mode to do.")

        # Full Mode
        parser_full = subparsers.add_parser('full', description=full_desc, help="Do mutagenesis with default list of mutable residues.", formatter_class=argparse.RawDescriptionHelpFormatter)
        parser_full.add_argument(dest="scan_mode", help=argparse.SUPPRESS, nargs='?', const=True, default='full')
        parser_full.add_argument('-v', '--verbose', dest="verbose", action='store_true', help="Output information during execution [default: %(default)s].")
        parser_full.add_argument('-t', '--turn-plot-off', dest="no_plot", action='store_false', help="Do not show plots [default: %(default)s].")

        # Mandatory options for full mode.
        mandatory_full = parser_full.add_argument_group("Mandatory Arguments")
        mandatory_full.add_argument("-p", "--pdb-file", dest="pdb_file_name", help="Name of the PDB file.", metavar="myPDB.pdb", required=True)
        mandatory_full.add_argument("-l", "--ligand-chains", dest="lig_chains", help="Ligand chain(s) to do the mutagenesis on.", metavar="A", type=str, nargs='+', required=True)

        # Default options for full mode.
        default_full = parser_full.add_argument_group("Default Arguments")
        default_full.add_argument("-a", "--residues-to-activate", dest="res2activate", help="Residues to activate for rotamer correction. [default: %(default)s].", metavar="D E", type=str, nargs='+', default="DERKH")
        default_full.add_argument('-i', '--disable-rotamer-correction', dest="no_rot_m", action='store_true', help="No residues will be activated for rotamer correction [default: %(default)s].")

        # manual selection
        parser_manual = subparsers.add_parser('manual', description=manual_desc, help="Do mutagenesis with the given resisues, [F M I L Y W].", formatter_class=argparse.RawDescriptionHelpFormatter)
        parser_manual.add_argument(dest="scan_mode", help=argparse.SUPPRESS, nargs='?', const=True, default='manual')
        parser_manual.add_argument('-v', '--verbose', dest="verbose", action='store_true', help="Output information during execution [default: %(default)s].")
        parser_manual.add_argument('-t', '--turn-plot-off', dest="no_plot", action='store_false', help="Do not show plots [default: %(default)s].")

        # Mandatory manual options.
        mandatory_manual = parser_manual.add_argument_group("Mandatory Arguments")
        mandatory_manual.add_argument("-p", "--pdb-file", dest="pdb_file_name", help="Name of the PDB file.", metavar="myPDB.pdb", required=True)
        mandatory_manual.add_argument("-l", "--ligand-chains", dest="lig_chains", help="Ligand chain(s) to do the mutagenesis on.", metavar="A", type=str, nargs='+', required=True)
        mandatory_manual.add_argument("-m", "--residues-for-mutagenesis", dest="mut_resisues", help="One letter code of residues to mutate to.", metavar="W", type=str, nargs='+', required=True)

        # Default options for full mode.
        default_manual = parser_manual.add_argument_group("Default Arguments")
        default_manual.add_argument("-a", "--residues-to-activate", dest="res2activate", help="Residues to activate for rotamer correction. [default: %(default)s].", metavar="D E", type=str, nargs='+', default="DERKH")
        default_manual.add_argument('-i', '--disable-rotamer-correction', dest="no_rot_m", action='store_true', help="No residues will be activated for rotamer correction [default: %(default)s].")
        # Process arguments
        args = parser.parse_args()

        config.pdb_basename = os.path.basename(args.pdb_file_name)[:-4]

        if args.verbose or args.verbose_g:
            config.verbose = True

        if args.no_rot_g or args.no_rot_m:
            config.do_rotamer_correction = False

        if not args.no_plot or not args.no_plot_g:
            config.showplots = False

        if config.verbose:
            print("{} started for PDB ID {} on: {}".format(program_name, config.pdb_basename, strftime(config.date_fmt, localtime())))
            if config.do_rotamer_correction:
                print("Rotamer correction is active.")
            else:
                print("Rotamer correction is NOT active.")

        if not check_executables():
            print("FATAL ERROR: Executable(s) not found.", file=sys.stderr)
            return 2
        if not os.path.isfile(args.pdb_file_name):
            print("FATAL ERROR: The PDB file [{}] cannot be found.".format(args.pdb_file_name), file=sys.stderr)
            return 2

        try:
            my_ampal = ampal.load_pdb(args.pdb_file_name)
        except Exception as e:
            sys.stderr.write("\nFatal Error: '{}' contains non standard amino acids.\n".format(args.pdb_file_name))
            sys.stderr.write("Error Info:\n{}\n".format(repr(e)))
            sys.exit(2)

        config.is_multimodel = is_multi_model(my_ampal)
        init_directories()

        start_mutagenesis(my_ampal, args)

        if config.verbose:
            print("{} Finished on: ".format(program_name), strftime(config.date_fmt, localtime()))

    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
    except Exception as e:
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help\n\n")
        return 2
    return 0
Ejemplo n.º 19
0
 def setUp(self):
     test_file_path = str(TEST_FILE_FOLDER / '3qy1.pdb')
     self.structure = load_pdb(test_file_path)
     tag_dssp_data(self.structure)
Ejemplo n.º 20
0
print(subdirs)

# Printing how many directories the script will run for
print("Fixing the pdb files for " + str(len(subdirs)) + " directories")

# 2. Looping through the sub directories and fixing the pdb files----------

# Looping through all the sub directories
for subdir in subdirs:

    # Extracting the name of the subdir
    subdir_name = os.path.basename(subdir)
    print("Subdir: " + subdir_name)

    # Loading the native structure
    native_structure = ampal.load_pdb(subdir + "/" + "native.pdb")

    # Deleting hydrogen atoms
    for residue in native_structure.get_monomers():
        del_keys = []
        for (k, v) in residue.atoms.items():
            if v.element == "H":
                del_keys.append(k)
        for k in del_keys:
            del residue.atoms[k]

    # Renaming the native pdb file to include the pdb id and saving it
    with open(subdir + "/" + subdir_name + "_native.pdb", "w") as outf:
        outf.write(native_structure.pdb)

    # Extracting all the file names of the decoy structures