Example #1
0
def is_pistack(sring, tring, pistack_dist_max=0.55, pistack_ang_dev=30.0, min_dist=0.05, pistack_offset_max=0.20):

    pistack = False
    pistack_data = {'type': None}

    # Calculate source ring geometric center and normal to it
    sring_center = sring.center()
    sring_normal = plane_fit(sring.coord, center=sring_center)

    # Calculate target ring geometric center and normal to it
    tring_center = tring.center()
    tring_normal = plane_fit(tring.coord, center=tring_center) * -1

    # Calculate distance between ring centers
    dist = distance(sring_center, tring_center)

    # Rule 1: distance between ring centers
    if min_dist < dist < pistack_dist_max:

        # Calculate ring offset, (project each ring center into the other ring)
        proj1 = projection(sring_normal, sring_center, tring_center)
        proj2 = projection(tring_normal, tring_center, sring_center)
        offset = min(distance(proj1, sring_center), distance(proj2, tring_center))

        # Calculate angles between normals
        # Select smallest  of the two depending on direction
        a = vector_angle(sring_normal, tring_normal, deg=True)
        a = min(a, 180 - a if not 180 - a < 0 else a)

        pistack_data.update({'distance': dist, 'angle': a, 'offset': offset, 'sring_center': sring_center,
                             'tring_center': tring_center, 'sring_normal': sring_normal,
                             'tring_normal': tring_normal, 'sring': tuple(sring['serial']),
                             'tring': tuple(tring['serial'])})

        # Rule 2: pi-stacking
        if 0 < a < pistack_ang_dev and offset < pistack_offset_max:
            pistack_data['type'] = 'ps'
            pistack = True

        # Rule 3: T-stacking
        elif 90 - pistack_ang_dev < a < 90 + pistack_ang_dev and offset < pistack_offset_max:
            pistack_data['type'] = 'ts'
            pistack = True

        if pistack:
            logger.info('Pi-stacking type: {0} between {1}-{2} {3} and {4}-{5} {6}'.format(pistack_data['type'],
                                                                                         set(sring['resSeq']),
                                                                                         set(sring['resName']),
                                                                                         tuple(sring['serial']),
                                                                                         set(tring['resSeq']),
                                                                                         set(tring['resName']),
                                                                                         tuple(tring['serial'])))
            logger.info('Distance: {distance:.3f} nm, angle: {angle:.2f} deg, offset: {offset:.3f} nm'.format(
                **pistack_data))

    return pistack, pistack_data
Example #2
0
def is_pication(cation, ring, min_dist=0.05, pication_dist_max=0.6, pication_offset_max=0.2,
                pication_amine_angle_dev=30.0):
    """
    Compute if a pi-cation interaction could exist between a cation and an
    aromatic ring

    Rules for pi-Cation interactions:
    1) Distance between cation and aromatic ring center should be between
       `min_dist` and `pication_dist_max`
    2) The distance offset between the ring center and the cation after
       projection onto the ring plane should not be more then
       `pication_offset_max`.
    3) If the cation is an amine that is linked with the remainder of the
       residue through more than one covalent bond (less degrees of freedom),
       the ring center should be positioned above the amine valence electrons
       computed as the angle between the normal of the ring and the normal
       defined by the heavy atom neighbours of the amine nitrogen that should
       be no more then `pication_amine_angle_dev`

    The function returns a boolean indicating if there is a pi-cation
    interaction and a dictionary with the computed metrics.
    notably: ring_center, ring_normal, distance, offset and cat_angle and
    cat_normal if it concerns an amine.

    :param cation:                    Cation atom selection
    :type cation:                     :interact:TopologyDataFrame,
                                      :interact:TopologySeries
    :param ring:                      Atom selection of the aromatic ring
    :type ring:                       :interact:TopologyDataFrame
    :param min_dist:                  minimum interaction distance (nm)
    :type min_dist:                   :py:float
    :param pication_dist_max:         maximum distance for pi-cation
                                      interactions (nm)
    :type pication_dist_max:          :py:float
    :param pication_offset_max:       Cutoff distance between geometric centers
    :type pication_offset_max:        :py:float
    :param pication_amine_angle_dev:  Maximum angle deviation between amine and
                                      ring normals.
    :type pication_amine_angle_dev:   :py:float

    :return:                          pi-cation interaction or not + data
    :rtype:                           :py:bool, py:dict
    """

    ispicat = False
    data = {}

    # Calculate distance between cation and ring center.
    # Calculate offset between ring center and cation projected onto ring plane
    ring_center = ring.center()
    ring_normal = plane_fit(ring.coord, center=ring_center)

    pcdist = distance(cation.coord, ring_center)
    pcoffset = distance(projection(ring_normal, ring_center, cation.coord), ring_center)

    data.update({'ring_center': ring_center, 'ring_normal': ring_normal, 'distance': pcdist, 'offset': pcoffset})
    if min_dist < pcdist < pication_dist_max and pcoffset < pication_offset_max:
        ispicat = True

        # If it concerns an tertiary or quarternary amine. Check angles.
        # Otherwise, we might have have a pi-cation interaction 'through' the ligand
        if cation.attype in ('N.3', 'N.4'):
            neigh = cation.neighbours(covalent=True)
            nonh = neigh[neigh['attype'] != 'H']

            # Count number of heavy atom neighbours that themselves are linked
            links = 0
            for i, n in nonh.iterrows():
                links += (len(n.neighbours(covalent=True)) -1 )

            if len(nonh) > 2 and links >= 2:

                # Calculate normal to plane defined by covalent neighbours of cation
                # Calculate angle between ring and cation normal
                cation_normal = plane_fit(nonh.coord, center=cation.coord)
                cation_angle = vector_angle(ring_normal, cation_normal)
                cation_angle = min(cation_angle, 180 - cation_angle if not 180 - cation_angle < 0 else cation_angle)

                data['cat_angle'] = cation_angle
                data['cat_normal'] = cation_normal

                # Vector angle should not deviate more then pication_amine_angle_dev
                if not cation_angle < pication_amine_angle_dev:
                    ispicat = False

                logging.debug('Cation likely an amine. Angle to ring normal: {0:.2f} deg.'.format(cation_angle))

    if ispicat:
        logger.info('Cation-pi interaction between {0}-{1} and ring {2}-{3}. Distance: {4:.3f} nm'
                    ' Offset:{5:.2f} nm'.format(cation.resName, cation.name, ring['resName'].values[0],
                                               ring['resSeq'].values[0], pcdist, pcoffset))

    return ispicat, data
Example #3
0
def eval_saltbridge(contact_frame, topology, max_charge_dist=0.55):
    """
    Evaluate contacts between centers of positive and negative charge.
    Physiological relevant pH is assumed.

    Algorithm:
    1) Primary selection is between all source and target atoms that are
       max_charge_dist apart according to (Barlow and Thornton, 1983) +
       0.15 nm
    2) Select all residues in previous selection that have a formal
       positive or negative charge according to the sum of partial charges
       in the 'charge' column. The latter charges are Gasteiger partial
       charges by default.
    3) Select all atoms that are likely a part of the charged group in
       in the residues from step 2 based on SYBYL atom types following:

    amino-acid                  type   atom         charge
    --------------------------------------------------------------------
    Arginine - Arg - R          N.pl3  RNHC(NH2)2+  +
    Lysine - Lys - K            N.4    RNH3         +
    Histidine - His - H         N.ar   ND1, NE2     +
    Aspartic acid - Asp - D     O.co2  RCOO-        -
    Glutamic acid - Glu - E     O.co2  RCOO-        -

    Ligands                     type   atom         charge
    --------------------------------------------------------------------
    quaterny ammonium           N.4                 +
    tertiary amines             N.am                +
    sulfonium groups            S.3                 +
    guanidine groups            C.cat               +
    phosphate                   O.co2  PO4          -
    sulfonate                   S.3    RSO2O-       -
    sulfonic acid               S.O2                -
    carboxylate                 O.co2               -

    4) Select neighbouring atoms not of element type 'C' or 'H' in the
       selection from step 3 to define all atoms part of the charged group
    5) Evaluate salt-bridges by the distance between the geometrical centers
       of two charged groups of opposite sign being smaller or equal to
       max_charge_dist.

    Although multiple atoms of both charged groups take part in the salt-bridge
    only the pair with the shortest atom-atom distance is reported using the
    labels:

    - 'sb-pn': for a positive charged source to negative target contact.
    - 'sb-np': for a negative charged source to positive target contact.

    Because salt-bridges are composed of hydrogen-bonded and charged
    interactions, the reported atom pair often was reported before as taking
    part in a hydrogen-bonded interactions when the `eval_hbonds` function was
    used. The salt-bridge label will be added to the contact column maintaining
    the hydrogen bond label.

    :param contact_frame:      contact DataFrame
    :type contact_frame:       :py:DataFrame
    :param topology:           Pandas DataFrame representing the structure
    :type topology:            :interact:ToplogyDataFrame
    :param max_charge_dist:    maximum distance cutoff between charge centers
    :type max_charge_dist:     :py:float

    :return:                   Adds the labels 'sb-np' or 'sb-pn' to the
                               'contact' column of the input contact frame.
    :rtype:                    :pandas:DataFrame
    """

    # Preselect all contacts below max_charge_dist
    chdist = contact_frame[contact_frame['target', 'distance'] <= max_charge_dist]

    # Select all charged source and target residues
    charged_groups = {'source': [], 'target': []}
    for group in charged_groups.keys():
        for charge_group in topology[topology['serial'].isin(chdist[group, 'serial'])].find_charged_centers():
            if charge_group[1] <= -1:
                charged_groups[group].append(('n', charge_group[0]))
            else:
                charged_groups[group].append(('p', charge_group[0]))

    if not len(charged_groups['source']) or not len(charged_groups['target']):
        logger.info('Not running salt-bridge detection. Charged groups in source: {0}, target: {1}'.format(
            len(charged_groups['source']), len(charged_groups['target'])))
        return contact_frame

    logger.info(
        "Run salt-bridge detection on {0} source and {1} target charged groups using: max_charge_dist={2}".format(
            len(charged_groups['source']), len(charged_groups['target']), max_charge_dist))

    # Loop over combinations of source and target charged groups
    for s, l in itertools.product(charged_groups['source'], charged_groups['target']):
        if s[0] != l[0]:
            center_distance = distance(s[1].center(), l[1].center())
            sb_type = 'sb-{0}{1}'.format(s[0], l[0])
            source_center = repr(list(s[1]['serial'])).strip('[]')
            target_center = repr(list(l[1]['serial'])).strip('[]')
            if center_distance <= max_charge_dist:
                logger.info('{0} between {1}-{2} and {3}-{4}. D: {5:.3f} nm between groups {6} and {7}'.format(
                    sb_type, s[1]['resSeq'].unique()[0], s[1]['resName'].unique()[0], l[1]['resSeq'].unique()[0],
                    l[1]['resName'].unique()[0], center_distance, source_center, target_center))

            # Report salt-bridges
            sb_selection = contact_frame[(contact_frame['source', 'serial'].isin(s[1]['serial'])) &
                                          contact_frame['target', 'serial'].isin(l[1]['serial'])]
            report_to = sb_selection.sort_values(by=('target', 'distance')).head(n=1)
            contact_frame.loc[report_to.index, 'contact'] = set_contact_type(
                contact_frame.loc[report_to.index, 'contact'], sb_type)

    return contact_frame
Example #4
0
def eval_heme_coordination(contact_frame, topology, rings=None, heme_dist_prefilter=0.55, heme_dist_max=0.35,
                           heme_dist_min=0, min_heme_coor_angle=105, max_heme_coor_angle=160, fe_ox_dist=0.16,
                           exclude=('H', 'O.3', 'O.2', 'O.co2', 'O.spc', 'O.t3p', 'C.cat', 'S.o2')):
    """
    Evaluate heme coordination of ligand atoms
    """

    rings = rings or []

    # Select all atoms within heme_dist_prefilter distance from Fe excluding atoms in exclude list
    fedist = contact_frame[(contact_frame['target', 'name'] == 'FE') &
                           (~contact_frame['source', 'attype'].isin(exclude)) &
                           (contact_frame['target', 'distance'] < heme_dist_prefilter)]
    if fedist.empty:
        return contact_frame

    # Get Fe atom
    fe = topology[(topology['resName'] == 'HEM') & (topology['name'] == 'FE')]
    if fe.empty:
        logger.warn("Unable to asses heme coordination. Fe atom not found")
        return contact_frame

    # Get four nitrogen atoms coordinating the Fe atom
    fe_neigh = fe.neighbours(cutoff=0.3)
    fe_coordinating = fe_neigh[(fe_neigh['resName'] == 'HEM') & (fe_neigh['element'] == 'N')].sort_values(by='name')
    if len(fe_coordinating) != 4:
        logger.warn("Unable to asses heme coordination. Found {0} nitrogen atoms coordinating Fe. Expected 4".format(
            len(fe_coordinating)))
        return contact_frame

    logger.debug("Run heme coordination detection on {0} possible contacts using: heme_dist_prefilter={1:.2f}, "
                 "heme_dist_min={2:.2f}, heme_dist_max={3:.2f}, min_heme_coor_angle={4:.2f}, max_heme_coor_angle={5:.2f}, "
                 "fe_ox_dist={6:.2f}".format(fedist.shape[0], heme_dist_prefilter, heme_dist_min, heme_dist_max,
                                             min_heme_coor_angle, max_heme_coor_angle, fe_ox_dist))

    # Calculate normals between Nitrogens -> Fe vectors
    fe_coor = fe.coord
    n_coor = fe_coordinating.coord - fe_coor

    m1 = numpy.cross(n_coor[0], n_coor[1])
    m2 = numpy.cross(n_coor[1], n_coor[2])
    m3 = numpy.cross(n_coor[2], n_coor[3])
    m4 = numpy.cross(n_coor[3], n_coor[0])

    # Is there an Oxygen above the heme (complex I) or do we need to place a dummy
    close_fe_neigh = fe.neighbours(cutoff=0.2)
    dummyox = close_fe_neigh[(close_fe_neigh['resName'] == 'HEM') & (close_fe_neigh['element'] == 'O')]
    mv = numpy.mean(numpy.vstack((m1, m2, m3, m4)), axis=0)
    if len(dummyox) == 1:
        dummyox = dummyox.coord
        logger.info('Oxygen atom bonded to Fe (complex I)')

    else:
        # Calculate dummy O atom from the average of the four normals
        # Normalize normal mean, change vector size to 1.6 A and set point
        dummyox = ((mv / numpy.linalg.norm(mv)) * fe_ox_dist) + fe_coor
        logger.info("Reconstructed oxygen atom placed {0}nm above Heme Fe at position {1}".format(fe_ox_dist, ' '.join(
            ['{0:.3f}'.format(c) for c in dummyox])))

    # Check the coordination of the Fe atom by the SG atom of the Cys below Heme
    sg = fe_neigh[(fe_neigh['resName'] == 'CYS') & (fe_neigh['name'] == 'SG')]
    if not sg.empty:
        sg_angle = angle(dummyox, fe_coor, sg.coord)
        if not 160 < sg_angle < 200:
            logger.warn("Angle between reconstructed oxygen -> Fe -> Cys SG has unusual value {0:.3f}".format(sg_angle))
    else:
        logger.warn("No CYS SG atom in a distance of 0.3nm of the Heme Fe atom")

    # Check if there are rings with there center of mass below heme_dist_prefilter from heme FE.
    # Calculate ring normals
    ring_normals = []
    for aromatic in rings:
        aromatic_center = aromatic.center()
        aromatic_fe_dist = distance(fe_coor, aromatic_center)
        if aromatic_fe_dist < heme_dist_prefilter:
            aromatic_norm = plane_fit(aromatic.coord, center=aromatic_center)
            aromatic_norm_angle = vector_angle(aromatic_norm, mv, deg=True)
            aromatic_norm_angle = min(aromatic_norm_angle,
                                      180 - aromatic_norm_angle if not 180 - aromatic_norm_angle < 0 else
                                      aromatic_norm_angle)

            ring = aromatic.index.tolist()
            ring_normals.append((aromatic_center, aromatic_norm, aromatic_norm_angle, ring))

            logger.info("Ring {0} close to heme Fe: distance center-Fe {1:.2f}nm, normal angle heme plane-ring:"
                        "{2:.2f} deg.".format(ring, aromatic_fe_dist, aromatic_norm_angle))

    # Get ligand atoms coordinated
    for idx, n in fedist.iterrows():

        source = topology[topology.index == n['source', 'index']]
        source_atom_type = n['source', 'attype']
        z = source.coord

        # Check for heme coordination by aromatic nitrogens. label as 'hc'
        if source_atom_type in ('N.ar', 'N.2', 'N.3'):
            ar_norm_angle = 90
            for ring in ring_normals:
                if n['source', 'index'] in ring[-1]:
                    ar_norm_angle = ring[2]
                    break
            fe_dist = distance(z, fe_coor)
            fe_offset = distance(projection(mv, fe_coor, z), fe_coor)
            if 45 < ar_norm_angle < 95 and fe_dist < 0.35 and fe_offset < 0.1:
                contact_frame.loc[idx, 'contact'] = set_contact_type(contact_frame.loc[idx, 'contact'], 'hc')
                contact_frame.loc[idx, ('target', 'angle')] = ar_norm_angle
                logger.info(
                    "Heme Fe coordination with {0} {1}. Distance: {2:.2f} A. offset: {3:.2f} A plane normal angle: {4:.2f}".format(
                        n['source', 'serial'],
                        n['source', 'name'], fe_dist, fe_offset, ar_norm_angle))

        # Check for possible sites of metabolism and label as 'hm'.
        # Filter on covalent neighbours and apply knowledge based rules.
        if source_atom_type in ('C.2', 'C.3', 'C.ar', 'N.1', 'N.2', 'N.4', 'N.pl3', 'S.3'):
            cutoff = 0.16
            if source_atom_type == 'S.3': cutoff = 0.18
            neigh = source.neighbours(cutoff=cutoff)
            neigh_atom_types = set(neigh['attype'])

            # If ligand atom is of type C.3 or C.ar it should contain at least one covalently bonded atom
            # of type ['H','Cl','I','Br','F','Hal']
            if source_atom_type in ('C.3', 'C.ar') and len(
                    neigh_atom_types.intersection({'H', 'Cl', 'I', 'Br', 'F', 'Hal'})) == 0:
                logger.debug(
                    "Ligand target atom {0}-{1} excluded. Atom type {2} not covalently bonded to: H,Cl,I,Br,F or Hal".format(
                        n['source', 'serial'], n['source', 'name'], source_atom_type))
                continue

            # If ligand atom is of type N.4 it should contain at least one covalently bonded atom of type H
            if source_atom_type == 'N.4' and not 'H' in neigh_atom_types:
                logger.debug(
                    "Ligand target atom {0}-{1} excluded. Atom type N.4 not covalently bonded to hydrogen".format(
                        n['source', 'serial'], n['source', 'name']))
                continue

            # Exclude carbons that are a part of ketone or carboxylate
            if source_atom_type == 'C.2' and 'O.2' in neigh_atom_types or 'O.co2' in neigh_atom_types:
                logger.debug(
                    "Ligand target atom {0}-{1} excluded. Atom type C.2 part of ketone or carboxylate group.".format(
                        n['source', 'serial'], n['source', 'name']))
                continue

            # Additional check on S.O2 wrongly labeled as S.3 (PLANTS?)
            if source_atom_type == 'S.3' and neigh[neigh['attype'] == 'O.2'].shape[0] == 2:
                logger.debug(
                    "Ligand target atom {0}-{1} excluded. Atom labeled as S.3 but probably S.O2 as it is covalently bonded to two O.2".format(
                        n['source', 'serial'], n['source', 'name']))
                continue

            # If N.pl3 or N.2 check for nitro- or nitrate group.
            if source_atom_type in ('N.pl3', 'N.2') and (
                    'O.co2' in neigh_atom_types or len(neigh_atom_types.intersection(set(['O.2', 'O.3']))) == 2):
                logger.debug("Ligand target atom {0}-{1} excluded. Atom type {2}, exclude nitro- nitrate group".format(
                    n['source', 'serial'], n['source', 'name'], source_atom_type))
                continue

            # Exclude (iso)-nitrile group
            if source_atom_type in 'N.1' and 'C.1' in neigh_atom_types:
                logger.debug(
                    "Ligand target atom {0}-{1} excluded. Carbon with Sp hybridized N".format(n['source', 'serial'],
                                                                                              n['source', 'name']))
                continue

            # Check Heme-Nitrogen coordination (Type II binding).
            if source_atom_type in ('C.ar', 'N.ar'):
                ar_norm_angle = None
                for ring in ring_normals:
                    if n['source', 'serial'] in ring[-1]:
                        ar_norm_angle = ring[2]
                        break
                if ar_norm_angle and not (45 < ar_norm_angle < 85 or 95 < ar_norm_angle < 135):
                    logger.debug(
                        "Ligand target atom {0}-{1} excluded. Aromatic C or N part of ring with angle of {2:.2f} with respect to Heme plane".format(
                            n['source', 'serial'], n['source', 'name'], ar_norm_angle))
                    continue

        fe_ox_angle = angle(fe_coor, dummyox, z[0])
        dist = distance(dummyox, z)
        if min_heme_coor_angle < abs(fe_ox_angle) < max_heme_coor_angle and heme_dist_min < dist < heme_dist_max:
            contact_frame.loc[idx, 'contact'] = set_contact_type(contact_frame.loc[idx, 'contact'], 'hm')
            contact_frame.loc[idx, ('target', 'angle')] = fe_ox_angle
            logger.info(
                "Heme Fe possible som with {0} {1}. Distance: {2:.3f} A. FE-O-X angle: {3:.3f}".format(
                    n['source', 'serial'], n['source', 'name'], dist, fe_ox_angle))
        else:
            logger.debug(
                "Ligand target atom {0}-{1} excluded. Angle ({2:.3f}) or distance ({3:.3f}) criteria violated".format(
                    n['source', 'serial'], n['source', 'name'], fe_ox_angle, dist))

    return contact_frame
Example #5
0
def eval_water_bridges(contacts,
                       topology,
                       min_wbridge_dist=0.25,
                       max_wbridge_dist=0.40,
                       min_omega_angle=75.0,
                       max_omega_angle=140.0,
                       min_theta_angle=100.0,
                       wbfilter=True):
    """
    Evaluate the presence of water mediated hydrogen bonded bridges
    in the provided contact DataFrame.

    Algorithm:
    1) Select all water oxygen atoms within the range defined by
       min_wbridge_dist (Jiang et al., 2005) - 0.01 nm and max_wbridge_dist
       (Jiang et al., 2005) + 0.04 nm
    2) For each water, get neighbouring atoms below max_wbridge_dist excluding
       other waters.
    3) In two loops look for ligand donor - water - other acceptor pairs and
       ligand acceptor - water - other donor pairs.
    4) For each pair check if there is at least one covalently bound hydrogen
       attached to the donor.
    5) Check the theta angle (water O - donor H - donor), should be larger than
       min_theta_angle (Jiang et al., 2005).
    6) Check the omega angle (acceptor - water O - donor H), should be in the
       range defined by min_omega_angle, max_omega_angle (Jiang et al., 2005).
    7) If wbfilter option is True: a water molecule is only allowed to
       participate as donor in two hydrogen bonds (two hydrogen atoms as
       donors). In the case of more than two possible hydrogen bonds for a
       water molecule as donor, only the two contacts with a water angle
       closest to 110 deg. and/or smaller H-bond distances are kept.

    :param contacts:           contact DataFrame
    :type contacts:            :py:DataFrame
    :param topology:           Pandas DataFrame representing the structure
    :type topology:            :interact:ToplogyDataFrame
    :param min_wbridge_dist:   Minimal distance for water bridged hydrogen bonds
    :type min_wbridge_dist:    :py:float
    :param min_wbridge_dist:   Maximum distance for water bridged hydrogen bonds
    :type min_wbridge_dist:    :py:float
    :param min_omega_angle:    Minimum omega angle, acceptor-waterO-donorH
    :type min_omega_angle:     :py:float
    :param max_omega_angle:    Maximum omega angle, acceptor-waterO-donorH
    :type max_omega_angle:     :py:float
    :param min_theta_angle:    Minimum theta angle, waterO-donorH-donor

    :return:                   Changes the 'contact' label in the contacts to
                               hb-ad (hydrogen bond acceptor-donor) or hb-da
                               (hydrogen bond donor-acceptor) for identified
                               hydrogen-bonded contacts. Also add the value of
                               the donor-H-acceptor angle.
    :rtype:                    :pandas:DataFrame

    """

    # Preselect all water oxygen's close to ligand
    wbdist = contacts[(contacts['target', 'distance'] > min_wbridge_dist)
                      & (contacts['target', 'distance'] <= max_wbridge_dist) &
                      (contacts['target', 'resName'] == 'HOH') &
                      (contacts['target', 'attype'] == 'O.3')]

    if wbdist.empty:
        logger.debug('No water oxygen atoms detected close to the ligand')
        return contacts

    logger.info(
        "Run water bridge detection on {0} possible contacts using: min_wbridge_dist={1},"
        "max_wbridge_dist={2}, min_omega_angle={3}, max_omega_angle={4}, min_theta_angle={5},"
        "wbfilter={6}".format(len(wbdist), min_wbridge_dist, max_wbridge_dist,
                              min_omega_angle, max_omega_angle,
                              min_theta_angle, wbfilter))

    # Query for potential hbond donor-acceptor pairs
    accpt_attypes = ('N.3', 'N.2', 'N.1', 'N.acid', 'N.ar', 'O.3', 'O.co2',
                     'O.2', 'S.m', 'S.a', 'F', 'Br', 'Cl')
    donor_attypes = ('N.3', 'N.2', 'N.acid', 'N.am', 'N.4', 'N.pl3', 'N.plc',
                     'O.3')

    # Loop over waters looking for water bridges
    ligresnum = wbdist['source', 'resSeq'].unique()
    for water in sorted(wbdist['target', 'serial'].unique()):

        # Get neighbouring atoms
        water = topology.loc[(topology['serial'] == water)]
        water_neigh = water.neighbours(cutoff=max_wbridge_dist)
        w = water.coord

        # Remove other waters
        if not water_neigh.empty:
            water_neigh = water_neigh[(water_neigh['resName'] != 'HOH')]

        # Query possible ligand donor - water - acceptor contacts
        dwa_pairs = []
        for idd, d in water_neigh[(water_neigh['resSeq'].isin(ligresnum)) & (
                water_neigh['attype'].isin(donor_attypes))].iterrows():

            donor = topology[topology['serial'] == d['serial']]
            covalent_neighbours = donor.neighbours(covalent=True)
            x = donor.coord

            # Check if there are H-atoms attached and asses H-bond geometry criteria
            for idy, h in covalent_neighbours[covalent_neighbours['attype'] ==
                                              'H'].iterrows():
                y = topology[topology['serial'] == h['serial']].coord

                # Check theta angle: donor - hydrogen - water oxygen
                theta = angle(x, y, w)
                if abs(theta) > min_theta_angle:

                    # Loop over possible acceptors
                    for ida, a in water_neigh[
                            ~(water_neigh['resSeq'].isin(ligresnum))
                            & (water_neigh['attype'].isin(accpt_attypes)
                               )].iterrows():

                        acceptor = topology[topology['serial'] ==
                                            a['serial']].coord

                        # Check omega angle: acceptor - water oxygen - donor h
                        omega = 180 - angle(acceptor, w, y)
                        if min_omega_angle < abs(omega) < max_omega_angle:
                            dist_aw = distance(w, acceptor)
                            dist_wd = distance(w, x)
                            dwa_pairs.append(
                                (omega, theta, dist_aw, dist_wd, d['serial'],
                                 a['serial'], water.serial.values[0]))

                            logger.info(
                                "Water bridge: donor {0}-{1}-{2}, acceptor {3}-{4}-{5}, water {6}. "
                                "Dist d-w {7:.3f} a-w {8:.3f}. Omega: {8:.2f} Theta {9:.2f}"
                                .format(donor.resName.values[0],
                                        donor.resSeq.values[0],
                                        donor.name.values[0], a.resName,
                                        a.resSeq, a.name,
                                        water.resSeq.values[0], dist_wd,
                                        dist_aw, omega, theta))

        if wbfilter and len(dwa_pairs) > 1:
            dwa_pairs.sort(key=lambda v: (110 - v[0]) + (v[2] + v[3]))
            dwa_pairs = [dwa_pairs[0]]

        for bridge in dwa_pairs:
            cid = contacts[(contacts['source', 'serial'] == bridge[4])
                           & (contacts['target', 'serial'] == bridge[6])]
            tid = topology[topology['serial'] == bridge[5]]
            contacts.loc[cid.index, 'contact'] = set_contact_type(
                contacts.loc[cid.index, 'contact'], 'wb-da')
            contacts.loc[cid.index, ('target', 'angle')] = bridge[1]

            newindex = contacts.index.max() + 1
            for mdx in [
                    col for col in contacts['target'].columns
                    if not col == 'index'
            ]:
                contacts.loc[newindex, ('target', mdx)] = cid['target',
                                                              mdx].values[0]
            for mdx in [
                    col for col in contacts['source'].columns
                    if not col == 'index'
            ]:
                contacts.loc[newindex, ('source', mdx)] = tid[mdx].values[0]

            contacts.loc[newindex, 'contact'] = 'wb-da'
            contacts.loc[newindex, ('target', 'distance')] = bridge[2]
            contacts.loc[newindex, ('target', 'angle')] = bridge[0]

        # Query possible ligand acceptor - water - donor contacts
        awd_pairs = []
        for ida, a in water_neigh[(water_neigh['resSeq'].isin(ligresnum)) & (
                water_neigh['attype'].isin(accpt_attypes))].iterrows():

            acceptor = topology[topology['serial'] == a['serial']].coord
            # Loop over possible donors
            for idd, d in water_neigh[
                    ~(water_neigh['resSeq'].isin(ligresnum))
                    & (water_neigh['attype'].isin(donor_attypes))].iterrows():

                donor = topology[topology['serial'] == d['serial']]
                covalent_neighbours = donor.neighbours(covalent=True)
                x = donor.coord

                # Check if there are H-atoms attached and asses H-bond geometry criteria
                for idy, h in covalent_neighbours[covalent_neighbours['attype']
                                                  == 'H'].iterrows():
                    y = topology[topology['serial'] == h['serial']].coord

                    # Check theta angle: donor - hydrogen - water oxygen
                    theta = angle(x, y, w)
                    if abs(theta) > min_theta_angle:

                        # Check omega angle: acceptor - water oxygen - donor h
                        omega = 180 - angle(acceptor, w, y)
                        if min_omega_angle < abs(omega) < max_omega_angle:
                            dist_aw = distance(w, acceptor)
                            dist_wd = distance(w, x)
                            awd_pairs.append(
                                (omega, theta, dist_aw, dist_wd, d['serial'],
                                 a['serial'], water.serial.values[0]))

                            logger.info(
                                "Water bridge: donor {0}-{1}-{2}, acceptor {3}-{4}-{5}, water {6}. "
                                "Dist d-w {7:.3f} a-w {8:.3f}. Omega: {8:.2f} Theta {9:.2f}"
                                .format(donor.resName.values[0],
                                        donor.resSeq.values[0],
                                        donor.name.values[0], a.resName,
                                        a.resSeq, a.name,
                                        water.resSeq.values[0], dist_wd,
                                        dist_aw, omega, theta))

        if wbfilter and len(awd_pairs) > 2:
            awd_pairs.sort(key=lambda x: (110 - x[0]) + (x[2] + x[3]))
            awd_pairs = awd_pairs[:2]

        for bridge in awd_pairs:
            cid = contacts[(contacts['source', 'serial'] == bridge[5])
                           & (contacts['target', 'serial'] == bridge[6])]
            tid = topology[topology['serial'] == bridge[4]]
            contacts.loc[cid.index, 'contact'] = set_contact_type(
                contacts.loc[cid.index, 'contact'], 'wb-ad')
            contacts.loc[cid.index, ('target', 'angle')] = bridge[1]

            newindex = contacts.index.max() + 1
            for mdx in [
                    col for col in contacts['target'].columns
                    if not col == 'index'
            ]:
                contacts.loc[newindex, ('target', mdx)] = cid['target',
                                                              mdx].values[0]
            for mdx in [
                    col for col in contacts['source'].columns
                    if not col == 'index'
            ]:
                contacts.loc[newindex, ('source', mdx)] = tid[mdx].values[0]

            contacts.loc[newindex, 'contact'] = 'wb-ad'
            contacts.loc[newindex, ('target', 'distance')] = bridge[2]
            contacts.loc[newindex, ('target', 'angle')] = bridge[0]

    # TODO: Reset dtype on atnum and resnum to int64 again. They get changed to float64 somehow.
    for n in ('source', 'target'):
        contacts[(n, 'serial')] = contacts[(n, 'serial')].astype('int64')
        contacts[(n, 'resSeq')] = contacts[(n, 'resSeq')].astype('int64')

    return contacts
Example #6
0
def eval_hbonds(contacts,
                topology,
                max_hbond_dist=0.41,
                hbond_don_anglediv=50.0,
                hbond_acc_anglediv=90.0,
                optimize=True):
    """
    Evaluate the presence of hydrogen bonded contacts in the provided
    contact DataFrame. This function does not evaluate water bridges.

    Prerequisites:
    This function uses the SYBYL atom types to identify possible
    hydrogen bond donor and acceptor atoms. At least one covalently
    bonded hydrogen is expected for donors and subsequently the
    possible bonding geometry in terms of distances and angles is
    evaluated.

    As a result the function requires the input structure to be fully
    protonated or having at least polar hydrogens attached.
    The geometry of the attached hydrogens influences the contacts
    identified. If the structure is not, or partially protonated, the
    method used to add hydrogens will influence the identified contacts.
    Structure derived from moleculare dynamics will likely having their
    (polar) hydrogens oriented as such to reflect a hydrogen bond if
    present. If hydrogens are added with another program this may not
    be the case. OpenBabel for instance will add hydrogens in standard
    conformation not taking into account the environment of the atom
    to wich hydrogens are attached. The HBplus program (McDonald I K &
    Thornton J M (1994). J. Mol. Biol., 238, 777-793.) also part of
    the LIGPLOT program, will optimize local hydrogen geometry first.

    Differences in the geometry of added hydrogens will mostly affect
    the angle criteria rather than the distance. To correct for
    non-optimized H-atom geometry without the need for optimization,
    the function allows the angle criteria to be a function of the
    number of attached atoms using the 'optimize' option. Angles are
    then defined as:

    180 / number of non-isolated covalent neighbours - 1

    for all donors that are not trigonal planar (N.pl3, N.plc, N.ar,
    N.2, O.2, O.co2, S.a)

    Algorithm:
    1) Select all heavy atom contacts within max_hbond_dist.
    2) Identify donor-acceptor pairs for source to target and target to
       source based on SYBYL atom types (see below).
    3) Check if donor has at least one covalently bonded H-atom
    4) Check if angle between donor - H - acceptor does not deviate more
       than hbond_don_anglediv from it's ideal in-plane (180) degree
       orientation (cone fit), (Hubbard & Haider, 2001). The value for
       hbond_don_anglediv is either fixed or a function of the number
       of covalently attached atoms (see above) when 'optimize' is True.
    5) Check if the angle between the heavy atom acceptor neighbour -
       acceptor - H does not deviate more than hbond_acc_anglediv.
    6) Check that distance donor-acceptor heavy atom is larger than
       donor-H-acceptor.

    atom descriptor       base type   donor1  acceptor    directionality
    --------------------------------------------------------------------
    sp3 N                  N.3        y       y           along lone pair
    sp2 N                  N.2        y       y           along lone pair
    sp  N                  N.1        n       y           along lone pair
    Acidic N               N.acid     y       y           along lone pair 2
    Aromatic N             N.ar       y       y           along lone pair
    Amide N                N.am       y       n
    Quaternary N           N.4        y       n
    Uncharged trigonal N   N.pl3      y       n           3
    Charged trigonal N     N.plc      y       n           4
    Hydroxyl O             O.3        y       y           in plane of lone pair
    Ether O                O.3        n       y           in plane of lone pair
    Carboxylate O          O.co2      n       y           along lone pair
    Carbonyl O             O.2        n       y           in plane of lone pair
    Nitro O                O.2        n       y           along lone pair
    N-oxide O              O.2        n       y
    Amide O                O.2        n       y           in plane of lone pair
    Neutral sulfur-bound O O.2        n       y           5
    Charged sulfur-bound O O.co2      n       y           cone 6
    Phosphate O            O.co2      n       y           cone
    Borate O               O.co2      n       y           cone
    Other neg-charged O    O.co2      n       y
    Negative charged S     S.m        n       y           along lone pair
    sp2 S                  S.a        n       y           along lone pair

    1: Provided at least one H-atom covalently bound
    2: An acidic nitrogen is a nitrogen bound by at least two single bonds
    3: As in uncharged histidine residue
    4: As in a guanidino residue
    5: As in sulfonamides, sulfoxides, sulfones
    6: As in sulphate groups

    :param contacts:           contact DataFrame
    :type contacts:            :py:DataFrame
    :param topology:           Pandas DataFrame representing the structure
    :type topology:            :interact:ToplogyDataFrame
    :param max_hbond_dist:     Maximum hydrogen bond distance cutoff
    :type max_hbond_dist:      :py:float
    :param hbond_don_anglediv: Maximum hydrogen bond donor-H-acceptor
                               angle deviation.
    :type hbond_don_anglediv:  :py:float
    :param hbond_acc_anglediv: Maximum hydrogen bond acceptor'-acceptor-H
                               angle deviation.
    :type hbond_acc_anglediv:  :py:float
    :param optimize:           Rather to optimize angle cutoff based on
                               donor atom geometry.
    :type optimize:            :py:bool

    :return:                   Changes the 'contact' label in the contacts to
                               hb-ad (hydrogen bond acceptor-donor) or hb-da
                               (hydrogen bond donor-acceptor) for identified
                               hydrogen-bonded contacts. Also add the value of
                               the donor-H-acceptor angle.
    :rtype:                    :pandas:DataFrame
    """

    # Preselect all contacts below max_hbond_dist
    hbdist = contacts[(contacts['target', 'distance'] <= max_hbond_dist)]

    logger.info(
        "Init eval_hbonds with {0} contacts using: max_hbond_dist={1}".format(
            len(hbdist), max_hbond_dist))

    # Query for potential hbond donor-acceptor pairs
    accpt_attypes = ('N.3', 'N.2', 'N.1', 'N.acid', 'N.ar', 'O.3', 'O.co2',
                     'O.2', 'S.m', 'S.a')
    donor_attypes = ('N.3', 'N.2', 'N.acid', 'N.am', 'N.ar', 'N.4', 'N.pl3',
                     'N.plc', 'O.3')
    donor_avoid = ('N.pl3', 'N.plc', 'N.ar', 'N.2', 'O.2', 'O.co2', 'S.a')

    # Define donor_acceptor pairs. Source donor - target acceptor and vice versa
    donor_acceptor_dict = dict()
    donor_acceptor_dict['source'] = hbdist[
        (hbdist['source', 'attype'].isin(donor_attypes))
        & (hbdist['target', 'attype'].isin(accpt_attypes))]
    donor_acceptor_dict['target'] = hbdist[
        (hbdist['source', 'attype'].isin(accpt_attypes))
        & (hbdist['target', 'attype'].isin(donor_attypes))]

    logger.info("{0} contacts after selecting for donor-acceptor pairs".format(
        len(donor_acceptor_dict['source'])))
    logger.info("{0} contacts after selecting for acceptor-donor pairs".format(
        len(donor_acceptor_dict['target'])))

    # Search for hbonds
    label_dict = {'source': 'hb-da', 'target': 'hb-ad'}
    anglediv = copy.copy(hbond_don_anglediv)
    for direction, selection in donor_acceptor_dict.items():

        target = 'source'
        if direction == 'source':
            target = 'target'

        # Ensure donor and acceptors have neighbours (e.a. not ions etc.)
        for idx, n in selection.iterrows():
            donor = topology[topology['serial'] == n[direction, 'serial']]
            acceptor = topology[topology['serial'] == n[target, 'serial']]

            donor_bonded = donor.neighbours(covalent=True)
            acceptor_bonded = acceptor.neighbours(covalent=True)
            acceptor_bonded = acceptor_bonded[
                (acceptor_bonded['attype'] != 'H')
                & (acceptor_bonded['resSeq'] == n[target, 'resSeq'])]

            # Get donor and acceptor heavy-atom coordinates
            donor_heavy_coor = donor.coord
            acceptor_heavy_coor = acceptor.coord

            # There should at least be covalent neighbours (e.a. not ions etc.)
            if donor_bonded.empty or acceptor_bonded.empty:
                logger.debug(
                    'No neighbours in contact pair {0}-{1}, skipping'.format(
                        n[direction, 'serial'], n[target, 'serial']))
                continue

            # Check if there are H-atoms attached and asses H-bond geometry criteria
            for idy, h in donor_bonded[(donor_bonded['attype'] == 'H') & (
                    donor_bonded['resSeq'] == n[direction,
                                                'resSeq'])].iterrows():

                # Get donor hydrogen coordinates
                donor_h_coor = h.coord

                # Angle donor - H - acceptor
                angle1 = angle(donor_heavy_coor, donor_h_coor,
                               acceptor_heavy_coor)
                dist1 = distance(donor_h_coor, acceptor_heavy_coor)

                # Angle acceptor_neigh - acceptor - H, keep largest.
                angle2 = []
                for serial in acceptor_bonded['serial']:
                    acceptor_neigh = topology[topology['serial'] ==
                                              serial].coord
                    angle2.append(
                        angle(acceptor_neigh, acceptor_heavy_coor,
                              donor_h_coor))

                # If optimize equals True, determine donor-H-acceptor angle deviation based on covalent bonding
                # geometry for all non trigonal planar donor atoms
                hbond_don_anglediv = anglediv
                if optimize and not donor['attype'].values[0] in donor_avoid:
                    substitutions = 0
                    for idz, i in donor_bonded.iterrows():
                        r = topology[topology['serial'] ==
                                     i['serial']].neighbours(covalent=True)
                        if len(r) > 1:
                            substitutions += 1
                    try:
                        hbond_don_anglediv = (180 / float(substitutions))
                    except ZeroDivisionError:
                        hbond_don_anglediv = 0.0

                if (180 - hbond_don_anglediv < abs(angle1) < 180 + hbond_don_anglediv) and \
                   (180 - hbond_acc_anglediv < abs(max(angle2)) < 180 + hbond_acc_anglediv) and \
                   (contacts.loc[idx, 'target'].distance > dist1):

                    contacts.loc[idx, 'contact'] = set_contact_type(
                        contacts.loc[idx, 'contact'], label_dict[direction])
                    contacts.loc[idx, ('target', 'angle')] = angle1
                    logger.info(
                        "H-bond between {0}-{1} {2}-{3} and {4}-{5} {6}-{7}. Distance D-A: {8:.3f}, "
                        "Distance DH-A: {9:.3f}, angle: {10:.2f} deg. hbond_don_anglediv: {11:.2f}"
                        .format(contacts.loc[idx, 'source'].resSeq,
                                contacts.loc[idx, 'source'].resName,
                                contacts.loc[idx, 'source'].serial,
                                contacts.loc[idx, 'source'].name,
                                contacts.loc[idx, 'target'].resSeq,
                                contacts.loc[idx, 'target'].resName,
                                contacts.loc[idx, 'target'].serial,
                                contacts.loc[idx, 'target'].name,
                                contacts.loc[idx, 'target'].distance, dist1,
                                angle1, hbond_don_anglediv))

    return contacts