Beispiel #1
0
def phar_from_mol(ligand):
    """Create Pharmacophore from given RDKit.Chem.Mol object."""
    if not isinstance(ligand, Chem.Mol):
        raise TypeError("Invalid ligand! Expected RDKit.Chem.Mol object, got "
                        "%s instead" % type(ligand).__name__)
    matches = {}
    for (phar, pattern) in PATTERNS.items():
        atoms = list(zip(*ligand.GetSubstructMatches(pattern)))
        if len(atoms) > 0:
            matches[phar] = list(atoms[0])
        else:
            matches[phar] = []
    points = {}  # graph ids of matched atoms

    nodes = []
    idx = 0
    for (phar, atoms) in matches.items():
        for atom in atoms:
            if atom in points:
                nodes[points[atom]]["type"][phar] = 1.0
            else:
                nodes.append({"label": idx, "type": {phar: 1.0}, "freq": 1.0})
                points[atom] = idx
                idx += 1

    edges = np.zeros((idx, idx))

    keys = sorted(points.keys())
    for i in range(len(keys)):
        for j in range(i):
            dist = float(
                __count_bonds(
                    ligand, keys[i], keys[j],
                    [keys[k] for k in range(len(keys)) if k not in [i, j]]))
            if dist > -1:
                edges[points[keys[i]], points[keys[j]]] = dist
                edges[points[keys[j]], points[keys[i]]] = dist

    if not ligand.HasProp("_Name"):
        return Pharmacophore(nodes, edges, molecules=1.0)

    else:
        return Pharmacophore(nodes,
                             edges,
                             molecules=1.0,
                             title=ligand.GetProp("_Name"))
Beispiel #2
0
def compare_nodes(n1, n2):
    """Compare types of two nodes. Return unnormalised similarity score and new
    dictionary of pharmacophoric properties for nodes combination.

    Args:
       n1, n2 (dict): nodes to compare

    Returns:
      float: unnormalised similarity score
      dict: pharmacophoric properties for nodes combination
    """
    if not isinstance(n1, dict):
        raise TypeError("Invalid n1! Expected dict, got %s instead" %
                        type(n1).__name__)
    if not isinstance(n2, dict):
        raise TypeError("Invalid n2! Expected dict, got %s instead" %
                        type(n2).__name__)

    if not Pharmacophore.check_node(n1):
        raise ValueError("Invalid n1!")

    if not Pharmacophore.check_node(n2):
        raise ValueError("Invalid n2!")

    c = n1["freq"] + n2["freq"]
    d1 = sum(n1["type"].values())
    d2 = sum(n2["type"].values())
    d = d1 + d2
    sim = 0.0
    t = {}

    for phar in PHARS:
        if phar in n1["type"] and phar in n2["type"]:
            sim += (n1["type"][phar] + n2["type"][phar]) / d
            t[phar] = n1["type"][phar] + n2["type"][phar]
        elif phar in n1["type"]:
            t[phar] = n1["type"][phar]
        elif phar in n2["type"]:
            t[phar] = n2["type"][phar]
    return sim * c, t
Beispiel #3
0
    def testSaveRead(self):
        from decaf import Pharmacophore
        from os import remove
        filename = "test.p"
        self.phar.save(filename)
        p_copy = Pharmacophore.read(filename)

        self.assertEqual(self.phar.numnodes, p_copy.numnodes)
        self.assertEqual(self.phar.nodes, p_copy.nodes)
        for i in range(p_copy.numnodes):
            for j in range(p_copy.numnodes):
                self.assertEqual(self.phar.edges[i, j], p_copy.edges[i, j])
        self.assertEqual(self.phar.title, p_copy.title)
        self.assertEqual(self.phar.molecules, p_copy.molecules)
        remove(filename)
        self.assertRaises(IOError, Pharmacophore.read, filename)
        self.assertRaises(IOError, Pharmacophore.save, p_copy,
                          "nonexist/" + filename)
Beispiel #4
0
Created on Mon Mar 16 10:11:53 2015
@author: Marta Stepniewska
"""

from pybel import readfile
from decaf import Pharmacophore
from decaf.toolkits.ob import phar_from_mol
from decaf.utils import similarity
from multiprocessing import Process, Manager, cpu_count
from time import sleep

NUM_PROCESSES = cpu_count()
cutoff = 0.8

database = readfile("smi", "all.ism")
model = Pharmacophore.read("model.p")

print "Read model with %s nodes created from %s molecules." % (model.numnodes,
                                                               model.molecules)

manager = Manager()
similar = manager.list()
proc = [None] * NUM_PROCESSES


def check_mol(mol):
    p = phar_from_mol(mol)
    s, c = similarity(model, p)
    #print s, c
    if s > cutoff:
        similar.append((mol.write(), s, c))
Beispiel #5
0
    def setUp(self):
        from decaf import Pharmacophore
        nodes = [{
            "label": 0,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 1,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 2,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 3,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 4,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 5,
            "freq": 2.0,
            "type": {
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 6,
            "freq": 2.0,
            "type": {
                "HA": 2.0
            }
        }, {
            "label": 7,
            "freq": 2.0,
            "type": {
                "HH": 2.0
            }
        }, {
            "label": 8,
            "freq": 1.0,
            "type": {
                "HA": 1.0,
                "HD": 1.0
            }
        }, {
            "label": 9,
            "freq": 1.0,
            "type": {
                "HA": 1.0,
                "HD": 1.0
            }
        }]

        edges = np.array([[0., 1., 0., 0., 0., 1., 0., 0., 0., 0.],
                          [1., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
                          [0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
                          [0., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
                          [0., 0., 0., 1., 0., 1., 0., 0., 0., 0.],
                          [1., 0., 0., 0., 1., 0., 1., 0., 0., 0.],
                          [0., 0., 0., 0., 0., 1., 0., 2., 0., 0.],
                          [0., 0., 0., 0., 0., 0., 2., 0., 1., 1.],
                          [0., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
                          [0., 0., 0., 0., 0., 0., 0., 1., 1., 0.]])

        self.phar = Pharmacophore(nodes, edges, molecules=2, title="test")
Beispiel #6
0
class PharmacophoreTests(unittest.TestCase):
    def setUp(self):
        from decaf import Pharmacophore
        nodes = [{
            "label": 0,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 1,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 2,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 3,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 4,
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 5,
            "freq": 2.0,
            "type": {
                "AR": 2.0,
                "R": 2.0
            }
        }, {
            "label": 6,
            "freq": 2.0,
            "type": {
                "HA": 2.0
            }
        }, {
            "label": 7,
            "freq": 2.0,
            "type": {
                "HH": 2.0
            }
        }, {
            "label": 8,
            "freq": 1.0,
            "type": {
                "HA": 1.0,
                "HD": 1.0
            }
        }, {
            "label": 9,
            "freq": 1.0,
            "type": {
                "HA": 1.0,
                "HD": 1.0
            }
        }]

        edges = np.array([[0., 1., 0., 0., 0., 1., 0., 0., 0., 0.],
                          [1., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
                          [0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
                          [0., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
                          [0., 0., 0., 1., 0., 1., 0., 0., 0., 0.],
                          [1., 0., 0., 0., 1., 0., 1., 0., 0., 0.],
                          [0., 0., 0., 0., 0., 1., 0., 2., 0., 0.],
                          [0., 0., 0., 0., 0., 0., 2., 0., 1., 1.],
                          [0., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
                          [0., 0., 0., 0., 0., 0., 0., 1., 1., 0.]])

        self.phar = Pharmacophore(nodes, edges, molecules=2, title="test")

    def tearDown(self):
        self.phar = None

    def testCreate(self):
        self.assertEqual(self.phar.numnodes, len(self.phar.nodes))
        for i in range(self.phar.numnodes):
            for j in range(i):
                self.assertEqual(
                    self.phar.edges[i, j],
                    self.phar.edges[j, i],
                    msg=("Array is asymetric! %s!=%s for i=%s, j=%s" %
                         (self.phar.edges[i, j], self.phar.edges[j, i], i, j)))

    def testIter(self):
        i = 0
        for node in self.phar:
            self.assertEqual(node, self.phar.nodes[i])
            i += 1

    def testAddNode(self):
        node = {"label": "CH3", "freq": 1.0, "type": {"HH": 2.0}}
        num = self.phar.numnodes + 1.0
        nodes = self.phar.nodes + [node]

        self.phar.add_node(node)

        self.assertEqual(num, self.phar.numnodes)
        self.assertEqual(num, len(self.phar.edges))
        self.assertEqual(num, len(self.phar.edges[0]))
        self.assertEqual(nodes, self.phar.nodes)

    def testDelNode(self):
        from random import randint
        idx = randint(0, self.phar.numnodes - 1)

        num = self.phar.numnodes - 1.0
        nodes = self.phar.nodes[:idx] + self.phar.nodes[idx + 1:]

        self.phar.remove_node(idx)

        self.assertEqual(num, self.phar.numnodes)
        self.assertEqual(nodes, self.phar.nodes)

    def testAddEdge(self):
        l = 1.0
        num = np.sum(self.phar.edges > 0) / 2.0 + 1
        for idx1 in range(self.phar.numnodes):
            for idx2 in range(idx1):
                if self.phar.edges[idx1, idx2] == 0:
                    self.phar.add_edge(idx1, idx2, l)

                    self.assertEqual(num, np.sum(self.phar.edges > 0) / 2.0)
                    self.assertEqual(self.phar.edges[idx1, idx2],
                                     self.phar.edges[idx2, idx1])
                    self.assertEqual(self.phar.edges[idx1, idx2], l)
                    self.setUp()

    def testRemoveEdge(self):
        num = np.sum(self.phar.edges > 0) / 2.0 - 1.0
        for idx1 in range(self.phar.numnodes):
            for idx2 in range(idx1):
                if self.phar.edges[idx1, idx2] > 0:

                    self.phar.remove_edge(idx1, idx2)

                    self.assertEqual(self.phar.edges[idx1, idx2],
                                     self.phar.edges[idx2, idx1])
                    self.assertEqual(self.phar.edges[idx1, idx2], 0.0)
                    self.assertEqual(num, np.sum(self.phar.edges > 0) / 2.0)
                    self.setUp()

    def testSaveRead(self):
        from decaf import Pharmacophore
        from os import remove
        filename = "test.p"
        self.phar.save(filename)
        p_copy = Pharmacophore.read(filename)

        self.assertEqual(self.phar.numnodes, p_copy.numnodes)
        self.assertEqual(self.phar.nodes, p_copy.nodes)
        for i in range(p_copy.numnodes):
            for j in range(p_copy.numnodes):
                self.assertEqual(self.phar.edges[i, j], p_copy.edges[i, j])
        self.assertEqual(self.phar.title, p_copy.title)
        self.assertEqual(self.phar.molecules, p_copy.molecules)
        remove(filename)
        self.assertRaises(IOError, Pharmacophore.read, filename)
        self.assertRaises(IOError, Pharmacophore.save, p_copy,
                          "nonexist/" + filename)

    def testValidation(self):
        from decaf import Pharmacophore

        self.assertRaises(TypeError, Pharmacophore, "a", self.phar.edges)
        self.assertRaises(TypeError, Pharmacophore, self.phar.nodes, "a")
        self.assertRaises(TypeError,
                          Pharmacophore,
                          self.phar.nodes,
                          self.phar.edges,
                          molecules="a")
        self.assertRaises(ValueError,
                          Pharmacophore,
                          self.phar.nodes,
                          self.phar.edges,
                          molecules=-1)
        self.assertRaises(TypeError,
                          Pharmacophore,
                          self.phar.nodes,
                          self.phar.edges,
                          title=1)

        invalid = [([{
            "freq": 2.0,
            "type": {
                "HH": 2.0,
                "AR": 2.0
            }
        }] + self.phar.nodes[1:], self.phar.edges),
                   ([{
                       "label": 0,
                       "type": {
                           "HH": 2.0,
                           "AR": 2.0
                       }
                   }] + self.phar.nodes[1:], self.phar.edges),
                   ([{
                       "label": 0,
                       "freq": 2.0
                   }] + self.phar.nodes[1:], self.phar.edges),
                   ([{
                       "label": 0,
                       "freq": 2.0,
                       "type": {
                           "H": 2.0,
                           "AR": 2.0
                       }
                   }] + self.phar.nodes[1:], self.phar.edges),
                   (self.phar.nodes, self.phar.edges[:3][:, :3])]

        for args in invalid:
            self.assertRaises(ValueError, Pharmacophore, *args)

        self.assertRaises(TypeError, self.phar.add_node, "1")
        self.assertRaises(ValueError, self.phar.add_node, {})
        self.assertRaises(TypeError, self.phar.remove_node, "1")
        self.assertRaises(ValueError, self.phar.remove_node, -1)
        self.assertRaises(ValueError, self.phar.remove_node,
                          self.phar.numnodes)
        self.assertRaises(TypeError, self.phar.add_edge, "0", 1, 2)
        self.assertRaises(TypeError, self.phar.add_edge, 0, "1", 2)
        self.assertRaises(TypeError, self.phar.add_edge, 0, 1, "2")
        self.assertRaises(ValueError, self.phar.add_edge, 0, 0, 2)
        self.assertRaises(ValueError, self.phar.add_edge, -1, 0, 2)
        self.assertRaises(ValueError, self.phar.add_edge, 0,
                          self.phar.numnodes, 2)
        self.assertRaises(ValueError, self.phar.remove_edge, -1, 0)
        self.assertRaises(TypeError, self.phar.remove_edge, "0", 1)
        self.assertRaises(TypeError, self.phar.remove_edge, 0, "1")
        self.assertRaises(ValueError, self.phar.remove_edge, 0,
                          self.phar.numnodes)
Beispiel #7
0
def combine_pharmacophores(p1,
                           p2,
                           dist_tol=0.0,
                           freq_cutoff=0.0,
                           add_neighbours=False):
    """Create new model from Pharmacophores p1 and p2

    Find common part of two Pharmacophores, add unique elements and calculate
      new frequencies and distances.

    Args:
      p1, p2 (Pharmacophore): models to combine
      dist_tol (float, optional): accept distance differences below this
        threshold
      freq_cutoff (float, optional): skip unique nodes with frequencies below
        this threshold
      add_neighbours (bool, optional): if True, try to extend alignment by
        adding neighbours of already aligned nodes.

    Returns:
       Pharmacophore: combination of p1 and p2
    """
    if not isinstance(p1, Pharmacophore):
        raise TypeError("Expected Pharmacophore, got %s instead" %
                        type(p1).__name__)

    if not isinstance(p2, Pharmacophore):
        raise TypeError("Expected Pharmacophore, got %s instead" %
                        type(p2).__name__)

    if not isinstance(dist_tol, (int, float)):
        raise TypeError("dist_tol must be float or int!")

    if dist_tol < 0:
        raise ValueError("dist_tol must be greater than or equal 0")

    if not isinstance(freq_cutoff, (int, float)):
        raise TypeError("freq_cutoff must be float or int!")

    if freq_cutoff < 0 or freq_cutoff > 1:
        raise ValueError("Invalid freq_cutoff! Use value in the range [0,1]")

    if not isinstance(add_neighbours, bool):
        raise TypeError("add_neighbours must be bool!")

    # find common pharmacophore
    _, _, mapped_nodes = map_pharmacophores(p1,
                                            p2,
                                            dist_tol,
                                            coarse_grained=False,
                                            add_neighbours=add_neighbours)
    dist1 = distances(p1)
    dist1[p1.edges > 0] = p1.edges[p1.edges > 0]
    dist2 = distances(p2)
    dist2[p2.edges > 0] = p2.edges[p2.edges > 0]

    # we will need it later
    added = {0: {}, 1: {}}

    # create new graph from common part
    molecules = p1.molecules + p2.molecules

    title = "(" + p1.title + ")+(" + p2.title + ")"
    nodes = []

    idx = 0
    for i in range(len(mapped_nodes[0])):
        u = p1.nodes[mapped_nodes[0][i]]
        v = p2.nodes[mapped_nodes[1][i]]
        _, types = compare_nodes(u, v)
        nodes.append({
            "label": idx,
            "type": types,
            "freq": u["freq"] + v["freq"]
        })
        for j in [0, 1]:
            added[j][idx] = mapped_nodes[j][i]
        idx += 1

    # add edges
    edges = np.zeros((idx, idx))
    for i in range(idx):
        no1 = (added[0][i], added[1][i])
        for j in range(i):
            dist = 0.0
            no2 = (added[0][j], added[1][j])
            freq1 = p1.nodes[no1[0]]["freq"] + p1.nodes[no2[0]]["freq"]
            freq2 = p2.nodes[no1[1]]["freq"] + p2.nodes[no2[1]]["freq"]
            if p1.edges[no1[0], no2[0]] or p2.edges[no1[1], no2[1]]:
                d1 = dist1[no1[0], no2[0]]
                d2 = dist2[no1[1], no2[1]]
                dist = (d1 * freq1 + d2 * freq2) / (freq1 + freq2)
                edges[i, j] = edges[j, i] = dist

    # do not warn about empty pharmacophore (nodes might be added latter)
    # warn about empty common part instead
    warnings.simplefilter("ignore", UserWarning)
    new_p = Pharmacophore(nodes=nodes,
                          edges=edges,
                          molecules=molecules,
                          title=title)
    warnings.simplefilter("always", UserWarning)
    if new_p.numnodes == 0:
        warnings.warn("Empty common part!")

    # add unique elements
    freq_cutoff = molecules * freq_cutoff

    to_add = [
        [
            i for i in range(p1.numnodes)
            if i not in mapped_nodes[0] and p1.nodes[i]["freq"] >= freq_cutoff
        ],
        [
            i for i in range(p2.numnodes)
            if i not in mapped_nodes[1] and p2.nodes[i]["freq"] >= freq_cutoff
        ]
    ]

    for (nr, phar) in {0: p1, 1: p2}.items():
        for n in to_add[nr]:
            added[nr][idx] = n
            new_p.add_node(phar.nodes[n].copy())
            new_p.nodes[idx]["label"] = idx
            for (k, v) in added[nr].items():
                if phar.edges[n, v] > 0:
                    new_p.add_edge(k, idx, phar.edges[n, v])
            idx += 1

    # check if new pharmacophore is connected
    components = split_components(new_p)
    comp_nr = len(components)

    if comp_nr > 1:
        # shortest distances between components
        comp_dist = np.zeros((comp_nr, comp_nr)) + float("inf")

        # nearest_node[i, j] == id of node from component j, that is nearest to
        # component i
        nearest_node = np.zeros((comp_nr, comp_nr), dtype=int)
        for i in range(comp_nr):
            for j in range(i):
                shortest_dist = float("inf")
                nearest_nodes = [None, None]
                for n1 in components[i]:
                    for n2 in components[j]:
                        if n1 in added[0] and n2 in added[0]:
                            d1 = dist1[added[0][n1], added[0][n2]]
                            freq1 = p1.nodes[added[0][n1]]["freq"] + \
                                    p1.nodes[added[0][n2]]["freq"]
                        else:
                            d1 = 0
                            freq1 = 0
                        if n1 in added[1] and n2 in added[1]:
                            d2 = dist2[added[1][n1], added[1][n2]]
                            freq2 = p2.nodes[added[1][n1]]["freq"] + \
                                    p2.nodes[added[1][n2]]["freq"]
                        else:
                            d2 = 0
                            freq2 = 0
                        if (freq1 + freq2) == 0:
                            dist = float("inf")
                        else:
                            dist = (d1 * freq1 + d2 * freq2) / (freq1 + freq2)

                        if dist < shortest_dist:
                            shortest_dist = dist
                            nearest_nodes = [n1, n2]
                comp_dist[i, j] = comp_dist[j, i] = shortest_dist
                if shortest_dist < float("inf"):
                    nearest_node[i, j] = nearest_nodes[1]
                    nearest_node[j, i] = nearest_nodes[0]

        sorted_connections = np.unravel_index(comp_dist.argsort(axis=None),
                                              comp_dist.shape)

        # connect components
        for i, j in zip(*sorted_connections):
            n1 = int(nearest_node[i, j])
            n2 = int(nearest_node[j, i])
            new_p.add_edge(n1, n2, comp_dist[i, j])

            # check if graph is already connected
            if len(split_components(new_p)) == 1:
                break

    return new_p
Beispiel #8
0
 def setUp(self):
     from decaf import Pharmacophore
     self.phars = [Pharmacophore.read(fname) for fname in self.files]