Ejemplo n.º 1
0
def test_canonize_ring_path():
    """Test canonic paths"""
    path0 = list(range(6))
    path = deque(path0)
    path.rotate(3)

    assert canonize_ring_path(path) == path0
    path.reverse()
    assert canonize_ring_path(path) == path0

    with pytest.raises(ValueError):
        canonize_ring_path(tuple(range(6)))
Ejemplo n.º 2
0
def test_canonize_ring_path():
    """Test canonic paths"""
    path0 = list(range(6))
    path = deque(path0)
    path.rotate(3)

    assert canonize_ring_path(path) == path0
    path.reverse()
    assert canonize_ring_path(path) == path0

    with pytest.raises(ValueError):
        canonize_ring_path(tuple(range(6)))
Ejemplo n.º 3
0
Archivo: ob.py Proyecto: joskid/oddt
    def _dicts(self):
        max_neighbors = 6  # max of 6 neighbors should be enough
        # Atoms
        atom_dtype = [
            ('id', np.uint32),
            # atom info
            ('coords', np.float32, 3),
            ('radius', np.float32),
            ('charge', np.float32),
            ('atomicnum', np.int8),
            ('atomtype', 'U5' if PY3 else 'a5'),
            ('hybridization', np.int8),
            ('numhs', np.uint8),
            ('formalcharge', np.int8),
            ('neighbors_id', np.int16, max_neighbors),
            ('neighbors', np.float32, (max_neighbors, 3)),
            # residue info
            ('resid', np.int16),
            ('resnum', np.int16),
            ('resname', 'U3' if PY3 else 'a3'),
            ('isbackbone', bool),
            # atom properties
            ('isacceptor', bool),
            ('isdonor', bool),
            ('isdonorh', bool),
            ('ismetal', bool),
            ('ishydrophobe', bool),
            ('isaromatic', bool),
            ('isminus', bool),
            ('isplus', bool),
            ('ishalogen', bool),
            # secondary structure
            ('isalpha', bool),
            ('isbeta', bool)
        ]

        atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype)
        metals = [
            3, 4, 11, 12, 13, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
            31, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 55, 56,
            57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
            74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92, 93,
            94, 95, 96, 97, 98, 99, 100, 101, 102, 103
        ]
        for i, atom in enumerate(self.atoms):

            atomicnum = atom.atomicnum
            # skip non-polar hydrogens for performance
            #            if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen():
            #                continue
            atomtype = typetable.Translate(atom.type)  # sybyl atom type
            partialcharge = atom.partialcharge
            coords = atom.coords

            if self.protein:
                residue = Residue(atom.OBAtom.GetResidue())
            else:
                residue = False

            # get neighbors, but only for those atoms which realy need them
            neighbors = np.zeros(max_neighbors,
                                 dtype=[('id', np.int16),
                                        ('coords', np.float32, 3),
                                        ('atomicnum', np.int8)])
            neighbors['coords'].fill(np.nan)
            for n, nbr_atom in enumerate(atom.neighbors):
                if n >= max_neighbors:
                    warnings.warn(
                        'Error while parsing molecule "%s" '
                        'for `atom_dict`. Atom #%i (%s) has %i '
                        'neighbors (max_neighbors=%i). Additional '
                        'neighbors are ignored.' %
                        (self.title, atom.idx0, atomtype, len(
                            atom.neighbors), max_neighbors), UserWarning)
                    break
                if nbr_atom.atomicnum == 1:
                    continue
                neighbors[n] = (nbr_atom.idx0, nbr_atom.coords,
                                nbr_atom.atomicnum)
            assert i == atom.idx0
            atom_dict[i] = (
                i,
                coords,
                ob.GetVdwRad(atomicnum),
                partialcharge,
                atomicnum,
                atomtype,
                atom.OBAtom.GetHyb(),
                atom.OBAtom.GetTotalDegree() - atom.OBAtom.GetHvyDegree(),
                atom.formalcharge,
                neighbors['id'],
                neighbors['coords'],
                # residue info
                residue.idx0 if residue else 0,
                residue.number if residue else 0,
                residue.name if residue else '',
                residue.OBResidue.GetAtomProperty(atom.OBAtom, 2)
                if residue else False,  # is backbone
                # atom properties
                False,  # atom.OBAtom.IsHbondAcceptor(),
                False,  # atom.OBAtom.IsHbondDonor(),
                False,  # atom.OBAtom.IsHbondDonorH(),
                atomicnum in metals,
                atomicnum == 6 and np.in1d(neighbors['atomicnum'],
                                           [6, 1, 0]).all(),  # hydrophobe
                atom.OBAtom.IsAromatic(),
                atom.formalcharge < 0,  # is charged (minus)
                atom.formalcharge > 0,  # is charged (plus)
                atomicnum in [9, 17, 35, 53],  # is halogen?
                False,  # alpha
                False  # beta
            )

        not_carbon = np.argwhere(
            ~np.in1d(atom_dict['atomicnum'], [1, 6])).flatten()
        # Acceptors
        patt = Smarts('[$([O;H1;v2]),'
                      '$([O;H0;v2;!$(O=N-*),'
                      '$([O;-;!$(*-N=O)]),'
                      '$([o;+0])]),'
                      '$([n;+0;!X3;!$([n;H1](cc)cc),'
                      '$([$([N;H0]#[C&v4])]),'
                      '$([N&v3;H0;$(Nc)])]),'
                      '$([F;$(F-[#6]);!$(FC[F,Cl,Br,I])])]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isacceptor'][np.intersect1d(matches - 1,
                                                   not_carbon)] = True

        # Donors
        patt = Smarts(
            '[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),'
            '$([$(n[n;H1]),'
            '$(nc[n;H1])])]),'
            # Guanidine can be tautormeic - e.g. Arginine
            '$([NX3,NX2]([!O,!S])!@C(!@[NX3,NX2]([!O,!S]))!@[NX3,NX2]([!O,!S])),'
            '$([O,S;H1;+0])]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isdonor'][np.intersect1d(matches - 1,
                                                not_carbon)] = True
            atom_dict['isdonorh'][[
                n.idx0 for idx in np.argwhere(atom_dict['isdonor']).flatten()
                for n in self.atoms[int(idx)].neighbors if n.atomicnum == 1
            ]] = True

        # Basic group
        patt = Smarts(
            '[$([N;H2&+0][$([C,a]);!$([C,a](=O))]),'
            '$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),'
            '$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))]),'
            '$([N,n;X2;+0])]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isplus'][np.intersect1d(matches - 1, not_carbon)] = True

        # Acidic group
        patt = Smarts('[CX3](=O)[OX1H0-,OX2H1]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isminus'][np.intersect1d(matches - 1,
                                                not_carbon)] = True

        if self.protein:
            # Protein Residues (alpha helix and beta sheet)
            res_dtype = [('id', np.int16), ('resnum', np.int16),
                         ('resname', 'U3' if PY3 else 'a3'),
                         ('N', np.float32, 3), ('CA', np.float32, 3),
                         ('C', np.float32, 3), ('O', np.float32, 3),
                         ('isalpha', bool), ('isbeta', bool)]  # N, CA, C, O

            b = []
            for residue in self.residues:
                backbone = {}
                for atom in residue:
                    if residue.OBResidue.GetAtomProperty(atom.OBAtom, 1):
                        if atom.atomicnum == 7:
                            backbone['N'] = atom.coords
                        elif atom.atomicnum == 6:
                            if atom.type == 'C3':
                                backbone['CA'] = atom.coords
                            else:
                                backbone['C'] = atom.coords
                        elif atom.atomicnum == 8:
                            backbone['O'] = atom.coords
                if len(backbone.keys()) == 4:
                    b.append((residue.idx0, residue.number, residue.name,
                              backbone['N'], backbone['CA'], backbone['C'],
                              backbone['O'], False, False))
            res_dict = np.array(b, dtype=res_dtype)
            res_dict = detect_secondary_structure(res_dict)
            alpha_mask = np.in1d(atom_dict['resid'],
                                 res_dict[res_dict['isalpha']]['id'])
            atom_dict['isalpha'][alpha_mask] = True
            beta_mask = np.in1d(atom_dict['resid'],
                                res_dict[res_dict['isbeta']]['id'])
            atom_dict['isbeta'][beta_mask] = True

        # Aromatic Rings
        r = []
        for ring in self.sssr:
            if ring.IsAromatic():
                path = [x - 1 for x in ring._path]  # NOTE: mol.sssr is 1-based
                atoms = atom_dict[canonize_ring_path(path)]
                if len(atoms):
                    atom = atoms[0]
                    coords = atoms['coords']
                    centroid = coords.mean(axis=0)
                    # get vector perpendicular to ring
                    ring_vectors = coords - centroid
                    vector = np.cross(ring_vectors,
                                      np.roll(ring_vectors, shift=-1,
                                              axis=0)).mean(axis=0)
                    r.append(
                        (centroid, vector, atom['resid'], atom['resnum'],
                         atom['resname'], atom['isalpha'], atom['isbeta']))
        ring_dict = np.array(r,
                             dtype=[('centroid', np.float32, 3),
                                    ('vector', np.float32, 3),
                                    ('resid', np.int16), ('resnum', np.int16),
                                    ('resname', 'U3' if PY3 else 'a3'),
                                    ('isalpha', bool), ('isbeta', bool)])

        self._atom_dict = atom_dict
        self._atom_dict.setflags(write=False)
        self._ring_dict = ring_dict
        self._ring_dict.setflags(write=False)
        if self.protein:
            self._res_dict = res_dict
            self._res_dict.setflags(write=False)