Esempio n. 1
0
def test_intrachain_inter_residue_atomic_distance():

    # A custom version of NTL9 is needed, since we want to have multiple protein chains
    protein_groups_residues = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
                               [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
                               [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
                               [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'),
                                 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(
        trajectory_filename=traj_filename,
        pdb_filename=pdb_filename,
        protein_grouping=protein_groups_residues)

    for protein_group in itertools.combinations(range(
            len(protein_groups_residues)),
                                                r=2):
        protein_a, protein_b = protein_group
        a_residues = protein_groups_residues[protein_a]
        b_residues = protein_groups_residues[protein_b]

        residue_a = random.randint(0, len(a_residues) - 1)
        residue_b = random.randint(0, len(b_residues) - 1)

        distances = trajectory.get_interchain_distance(protein_a, protein_b,
                                                       residue_a, residue_b)
        assert len(distances) == len(a_residues)
Esempio n. 2
0
def test_get_intra_chain_distance_map_protein_groups_with_residue_indices():
    # Note that the residues for the resID1 and resID2 indices are with respect to the residues of the protein chain
    # and not the full protein itself.
    protein_groups_residues = [[0, 1, 2, 3, 4, 5, 6, 7, 8],
                               [10, 11, 12, 13, 14, 15, 16, 17, 18],
                               [20, 21, 22, 23, 24, 25, 26, 27, 28],
                               [30, 31, 32, 33, 34, 35, 36, 37, 38],
                               [40, 41, 42, 43, 44, 45, 46, 47, 48]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups_residues)

    # select pairwise protein ids (no repetitions).
    for protein_group in itertools.combinations(range(len(protein_groups_residues)), r=2):
        protein_a, protein_b = protein_group

        # now, let's randomly choose a set of residues to select for `protein_a` and `protein_b`
        num_residues_a = random.randint(1, len(protein_groups_residues[protein_a]))
        num_residues_b = random.randint(1, len(protein_groups_residues[protein_b]))

        protein_a_residues = protein_groups_residues[protein_a]
        a_residues = list(sorted(random.sample(protein_a_residues, num_residues_a)))
        a_residues = [(r - protein_a_residues[0]) for r in a_residues]  # offset by the starting residue number

        protein_b_residues = protein_groups_residues[protein_b]
        b_residues = list(sorted(random.sample(protein_b_residues, num_residues_b)))
        b_residues = [(r - protein_b_residues[0]) for r in b_residues]  # offset by the starting residue number

        distance_map, stddev_map = trajectory.get_interchain_distance_map(protein_a, protein_b)
        assert distance_map.shape == stddev_map.shape
Esempio n. 3
0
def test_get_interchain_distance_failing_atom():
    protein_groups = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'),
                                 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    mode = 'atom'
    atoms1 = ['X', 'CA']
    atoms2 = atoms1[::-1]
    for protein_group in itertools.combinations(range(len(protein_groups)),
                                                r=2):
        protein_a, protein_b = protein_group

        # There appears to be an issue where R1 > R2 - the calculation crashes for sidechain-heavy.
        # See `test_get_interchain_distance`
        R1 = 0  #random.choice(range(len(protein_groups[protein_a])))
        R2 = 1  #random.choice(range(len(protein_groups[protein_b])))

        for A1, A2 in zip(atoms1, atoms2):
            with pytest.raises(SSException):
                trajectory.get_interchain_distance(protein_a, protein_b, R1,
                                                   R2, A1, A2, mode)
Esempio n. 4
0
def test_read_in_compare_trajectories(GS6_CO):
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'gs6.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'gs6.xtc')
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename)

    assert np.allclose(trajectory.traj.xyz, GS6_CO.traj.xyz)
Esempio n. 5
0
def test_read_in_trajectory_pdblead(GS6_CO):
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'gs6.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'gs6.xtc')
    traj = sstrajectory.SSTrajectory(pdb_filename=pdb_filename, trajectory_filename=traj_filename, pdblead=True)

    # Since we're using the PDB as an initial frame, the number of frames should have increased by 1.
    assert len(traj) == len(GS6_CO) + 1
Esempio n. 6
0
def test_cap_DSSP():
    """
    Tests DSSP (and hence R1/R2 selection) works
    """

    test_data_dir = soursop.get_data('test_data')
    pdb_files = ['ACE_NME_multichain', 'ACE_NME_start_at_5']
    for cap_name in pdb_files:
        cap_path = os.path.join(test_data_dir, 'cap_tests', '{}.pdb'.format(cap_name))
        cap_trajectory = sstrajectory.SSTrajectory(cap_path, cap_path)
Esempio n. 7
0
def test_read_in_protein_grouping_multiple():
    protein_groups = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    # verify that we have loaded the number of proteins expected
    assert trajectory.n_proteins == len(protein_groups)
Esempio n. 8
0
def test_read_in_protein_grouping_simple():
    protein_groups = [[0]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'gs6.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'gs6.xtc')
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    # verify that we have loaded the number of proteins expected
    assert trajectory.num_proteins == len(protein_groups)
Esempio n. 9
0
def test_read_in_protein_grouping_invalid_residues():
    protein_groups = [[1000, 1001, 1002], [1003, 1004, 1005], [1006, 1007, 1008]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    # Since this is a failing but non-disruptive test (i.e. no Exceptions), we
    # check the number of proteins which should be 0.
    assert trajectory.n_proteins == 0
Esempio n. 10
0
def test_read_in_protein_grouping_multiple_mixed_order():
    # TODO: This test passes - it shouldn't. Look into this further.
    protein_groups = [[1, 2, 0], [10, 3, 7], [11, 1, 3]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    # verify that we have loaded the number of proteins expected
    assert trajectory.n_proteins == len(protein_groups)
Esempio n. 11
0
def test_cap_BBSEG():
    """
    Tests DSSP (and hence R1/R2 selection) works
    """

    for CN in CAPNAMES:
        CT = sstrajectory.SSTrajectory(
            "%s/cap_tests/%s.pdb" % (test_data_dir, CN),
            "%s/cap_tests/%s.pdb" % (test_data_dir, CN))
        for CP in CT.proteinTrajectoryList:
            assert len(CP.get_secondary_structure_DSSP()[0]) == 6
Esempio n. 12
0
def test_get_intra_chain_distance_map_protein_groups():
    protein_groups = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    for protein_group in itertools.combinations(range(len(protein_groups)), r=2):
        protein_a, protein_b = protein_group
        distance_map, stddev_map = trajectory.get_interchain_distance_map(protein_a, protein_b)
        assert distance_map.shape == stddev_map.shape
        assert np.count_nonzero(distance_map) == len(distance_map.flatten())  # since the residue indices are unique
Esempio n. 13
0
def test_cap():
    """
    Test caps assignment works
    """

    for CN in CAPNAMES:
        CT = sstrajectory.SSTrajectory(
            "%s/cap_tests/%s.pdb" % (test_data_dir, CN),
            "%s/cap_tests/%s.pdb" % (test_data_dir, CN))
        for CP in CT.proteinTrajectoryList:
            if CP.ccap:
                assert CN in CCAP
            if CP.ncap:
                assert CN in NCAP
Esempio n. 14
0
def test_get_interchain_distance_invalid_protein_ids():
    protein_groups = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    R1 = 0
    R2 = 1
    A1 = 'CA'
    A2 = 'CA'
    modes = 'atom,ca,closest,closest-heavy,sidechain,sidechain-heavy'.split(',')
    
    protein_a = 100
    protein_b = 101
    for mode in modes:
        with pytest.raises(SSException):
            trajectory.get_interchain_distance(protein_a, protein_b, R1, R2, A1, A2, mode)
Esempio n. 15
0
def test_get_interchain_distance():
    protein_groups = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    for protein_group in itertools.combinations(range(len(protein_groups)), r=2):
        protein_a, protein_b = protein_group

        # There appears to be an issue where R1 > R2 - the calculation crashes for sidechain-heavy.
        R1 = 0 #random.choice(range(len(protein_groups[protein_a])))
        R2 = 1 #random.choice(range(len(protein_groups[protein_b])))
        A1 = 'CA'
        A2 = 'CA'
        modes = 'atom,ca,closest,closest-heavy,sidechain,sidechain-heavy'.split(',')
        for mode in modes:
            distances = trajectory.get_interchain_distance(protein_a, protein_b, R1, R2, A1, A2, mode)
            assert len(distances) > 0
Esempio n. 16
0
def test_get_interchain_distance_invalid_mode():
    protein_groups = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    mode = 'unknown'
    for protein_group in itertools.combinations(range(len(protein_groups)), r=2):
        protein_a, protein_b = protein_group

        # There appears to be an issue where R1 > R2 - the calculation crashes for sidechain-heavy.
        R1 = 0 #random.choice(range(len(protein_groups[protein_a])))
        R2 = 1 #random.choice(range(len(protein_groups[protein_b])))
        A1 = 'CA'
        A2 = 'CA'
        
        with pytest.raises(SSException):
            trajectory.get_interchain_distance(protein_a, protein_b, R1, R2, A1, A2, mode)
Esempio n. 17
0
def test_get_interchain_distance_failing_residues():
    protein_groups = [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'ntl9.xtc')  # ntl9 has 56 residues
    trajectory = sstrajectory.SSTrajectory(trajectory_filename=traj_filename,
                                           pdb_filename=pdb_filename,
                                           protein_grouping=protein_groups)

    residues1 = [0, 100]
    residues2 = residues1[::-1]
    A1 = 'CA'
    A2 = 'CA'
    modes = 'atom,ca,closest,closest-heavy,sidechain,sidechain-heavy'.split(',')
    
    for protein_group in itertools.combinations(range(len(protein_groups)), r=2):
        protein_a, protein_b = protein_group
        for mode in modes:
            for R1, R2 in zip(residues1, residues2):
                with pytest.raises(SSException):
                    trajectory.get_interchain_distance(protein_a, protein_b, R1, R2, A1, A2, mode)
Esempio n. 18
0
def GMX_2CHAINS(request):
    GMX_2CHAINS = sstrajectory.SSTrajectory(
        "%s/%s" % (test_data_dir, GROMACS_2_CHAINS[1]),
        "%s/%s" % (test_data_dir, GROMACS_2_CHAINS[0]))
    return GMX_2CHAINS
Esempio n. 19
0
def test_read_in_no_topology_xtc():
    traj_filename = os.path.join(soursop.get_data('test_data'), 'gs6.xtc')
    with pytest.raises(SSException):
        sstrajectory.SSTrajectory(trajectory_filename=traj_filename)
Esempio n. 20
0
def NTL9_CO(request):
    topology_path = os.path.join(test_data_dir, NTL9_FILES[0])
    trajectory_path = os.path.join(test_data_dir, NTL9_FILES[1])
    NTL9_CO = sstrajectory.SSTrajectory(trajectory_path, topology_path)
    return NTL9_CO
Esempio n. 21
0
def NTL9_CP(request):
    topology_path = os.path.join(test_data_dir, NTL9_FILES[0])
    trajectory_path = os.path.join(test_data_dir, NTL9_FILES[1])
    NTL9_CO = sstrajectory.SSTrajectory(trajectory_path, topology_path)
    NTL9_CP = NTL9_CO.proteinTrajectoryList[0]
    return NTL9_CP
Esempio n. 22
0
def GS6_CP(request):
    topology_path = os.path.join(test_data_dir, GS6_FILES[0])
    trajectory_path = os.path.join(test_data_dir, GS6_FILES[1])
    GS6_CO = sstrajectory.SSTrajectory(trajectory_path, topology_path)
    GS6_CP = GS6_CO.proteinTrajectoryList[0]
    return GS6_CP
Esempio n. 23
0
def GS6_CO(request):
    topology_path = os.path.join(test_data_dir, GS6_FILES[0])
    trajectory_path = os.path.join(test_data_dir, GS6_FILES[1])
    GS6_CO = sstrajectory.SSTrajectory(trajectory_path, topology_path)
    return GS6_CO
Esempio n. 24
0
def test_read_in_no_trajectory_and_no_topology():
    with pytest.raises(SSException):
        sstrajectory.SSTrajectory()
Esempio n. 25
0
def test_read_in_no_trajectory_and_no_topology_but_use_custom_trajectory(
        GS6_CO):
    trajectory = GS6_CO.traj
    sstrajectory.SSTrajectory(TRJ=trajectory)
Esempio n. 26
0
def test_trajectory_initialization_debug():
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'gs6.pdb')
    traj_filename = os.path.join(soursop.get_data('test_data'), 'gs6.xtc')
    sstrajectory.SSTrajectory(pdb_filename=pdb_filename,
                              trajectory_filename=traj_filename,
                              debug=True)
Esempio n. 27
0
def test_read_in_no_trajectory():
    pdb_filename = os.path.join(soursop.get_data('test_data'), 'gs6.pdb')
    with pytest.raises(SSException):
        sstrajectory.SSTrajectory(pdb_filename=pdb_filename)
Esempio n. 28
0
def test_read_in_no_topology_dcd():
    """The `SSTrajectory.__init__` references that `.dcd` files are supported too."""
    traj_filename = os.path.join(soursop.get_data('test_data'), 'gs6.dcd')
    with pytest.raises(SSException):
        sstrajectory.SSTrajectory(trajectory_filename=traj_filename)