Ejemplo n.º 1
0
  def test_generate_fraggle_fragmentation(self):
    mol = Chem.MolFromSmiles('COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12')

    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    self.assertEqual(len(frags), 16)
    expected = (
      '[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1cncc(C)c1.[*]Cc1cc(OC)c2ccccc2c1OC',
      '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1',
      '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1',
      '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12',
      '[*]c1c(OC)cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c1[*]',
      '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12',
      '[*]N1CCC(NC(=O)c2cncc(C)c2)CC1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1')
    expected = [_of(s) for s in expected]
    for smi in frags:
      self.assertTrue(_of(smi) in expected)

    # Test case for fragments that contain a cyclic and acyclic component
    mol = Chem.MolFromSmiles('c12c(CCC)cccc2cccc1')
    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    expected = ['[*]CCC.[*]c1ccccc1[*]', '[*]Cc1cccc2ccccc12', '[*]c1cccc(CCC)c1[*]',
                '[*]c1cccc2ccccc12', '[*]c1ccccc1[*]']
    expected = [_of(s) for s in expected]
    for smi in frags:
      self.assertTrue(_of(smi) in expected)
Ejemplo n.º 2
0
    def test_GetFraggleSimilarity(self):
        q = Chem.MolFromSmiles(
            'COc1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12')
        m = Chem.MolFromSmiles(
            'COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12')
        sim, match = FraggleSim.GetFraggleSimilarity(q, m)
        self.assertAlmostEqual(sim, 0.980, places=2)
        self.assertEqual(match, '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1')

        m = Chem.MolFromSmiles(
            'COc1cc(CN2CCC(Nc3nc4ccccc4s3)CC2)c(OC)c2ccccc12')
        sim, match = FraggleSim.GetFraggleSimilarity(q, m)
        self.assertAlmostEqual(sim, 0.794, places=2)
        self.assertEqual(match, '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1')

        q = Chem.MolFromSmiles('COc1ccccc1')
        sim, match = FraggleSim.GetFraggleSimilarity(q, m)
        self.assertAlmostEqual(sim, 0.347, places=2)
        self.assertEqual(match, '[*]c1ccccc1')

        m = Chem.MolFromSmiles(
            'COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12')
        sim, match = FraggleSim.GetFraggleSimilarity(q, m)
        self.assertAlmostEqual(sim, 0.266, places=2)
        self.assertEqual(match, '[*]c1ccccc1')
Ejemplo n.º 3
0
  def test_generate_fraggle_fragmentation(self):
    mol = Chem.MolFromSmiles('COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12')

    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    self.assertEqual(len(frags), 16)
    expected = (
      '[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1cncc(C)c1.[*]Cc1cc(OC)c2ccccc2c1OC',
      '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1',
      '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1',
      '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12',
      '[*]c1c(OC)cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c1[*]',
      '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12',
      '[*]N1CCC(NC(=O)c2cncc(C)c2)CC1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1')
    expected = [_of(s) for s in expected]
    for smi in frags:
      self.assertTrue(_of(smi) in expected)

    # Test case for fragments that contain a cyclic and acyclic component
    mol = Chem.MolFromSmiles('c12c(CCC)cccc2cccc1')
    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    expected = ['[*]CCC.[*]c1ccccc1[*]', '[*]Cc1cccc2ccccc12', '[*]c1cccc(CCC)c1[*]',
                '[*]c1cccc2ccccc12', '[*]c1ccccc1[*]']
    expected = [_of(s) for s in expected]
    for smi in frags:
      self.assertTrue(_of(smi) in expected)
Ejemplo n.º 4
0
 def test_isValidRingCut(self):
   rdBase.DisableLog('rdApp.error')
   self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]CCC[*]')), False)
   self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]C1CC1[*]')), True)
   self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]c1ccccc1[*]')), True)
   self.assertEqual(
     FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]cccc[*]', sanitize=False)), False)
   rdBase.EnableLog('rdApp.error')
Ejemplo n.º 5
0
 def test_isValidRingCut(self):
   rdBase.DisableLog('rdApp.error')
   self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]CCC[*]')), False)
   self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]C1CC1[*]')), True)
   self.assertEqual(FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]c1ccccc1[*]')), True)
   self.assertEqual(
     FraggleSim.isValidRingCut(Chem.MolFromSmiles('[*]cccc[*]', sanitize=False)), False)
   rdBase.EnableLog('rdApp.error')
Ejemplo n.º 6
0
    def testFragmentation(self):
        """ 
    
    """
        mol = Chem.MolFromSmiles(
            'COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12')
        frags = FraggleSim.generate_fraggle_fragmentation(mol)
        self.assertEqual(len(frags), 16)

        expected = ('[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
                    '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
                    '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC',
                    '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
                    '[*]C(=O)c1cncc(C)c1.[*]Cc1cc(OC)c2ccccc2c1OC',
                    '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1',
                    '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1',
                    '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC',
                    '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
                    '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1',
                    '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12',
                    '[*]c1c(OC)cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c1[*]',
                    '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12',
                    '[*]N1CCC(NC(=O)c2cncc(C)c2)CC1.[*]c1cc(OC)c2ccccc2c1OC',
                    '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1',
                    '[*]c1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1')
        for smi in frags:
            self.assertTrue(smi in expected)
Ejemplo n.º 7
0
  def testFragmentation(self):
    """ 
    
    """
    mol = Chem.MolFromSmiles('COc1cc(CN2CCC(CC2)NC(=O)c2cncc(C)c2)c(OC)c2ccccc12')
    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    self.failUnlessEqual(len(frags),16)

    expected=('[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
 '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
 '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC',
 '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
 '[*]Cc1cc(OC)c2ccccc2c1OC.[*]C(=O)c1cncc(C)c1',
 '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1',
 '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1',
 '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC',
 '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
 '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1',
 '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12',
 '[*]c1c([*])c(OC)c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc1OC',
 '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12',
 '[*]c1cc(OC)c2ccccc2c1OC.[*]N1CCC(NC(=O)c2cncc(C)c2)CC1',
 '[*]c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
 '[*]c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC')
    for smi in frags:
        self.failUnless(smi in expected)
Ejemplo n.º 8
0
def frag(
    mol: Chem.Mol,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Generate all possible fragmentation of a molecule.

    Args:
        mol: a molecule.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    frags = FraggleSim.generate_fraggle_fragmentation(mol)

    smiles = set([])
    for seq in frags:
        smiles |= {s.strip() for s in seq.split(".")}

    smiles = list(sorted(smiles, reverse=True))
    frags = [dm.to_mol(s) for s in smiles]

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]

    frags = [x for x in frags if x is not None]

    if remove_parent:
        return frags
    return [mol] + frags
Ejemplo n.º 9
0
def generate_sim(base: str, smile: str):
    base = Chem.MolFromSmiles(base)
    smile = Chem.MolFromSmiles(smile)
    try:
        sim, match = FraggleSim.GetFraggleSimilarity(smile, base)
    except:
        return 0.0, None

    return sim, match
Ejemplo n.º 10
0
  def testFragmentation2(self):
    mol = Chem.MolFromSmiles('COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12')

    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    self.assertEqual(len(frags), 13)

    expected = (
      '[*]C(=O)c1ccccc1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1ccccc1.[*]Cc1cc(OC)c2ccccc2c1OC', '[*]C(=O)c1ccccc1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1ccccc1',
      '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1ccccc1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1ccccc1',
      '[*]N1CCC(NC(=O)c2ccccc2)CC1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]NC(=O)c1ccccc1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1ccccc1',
      '[*]c1c(CN2CCC(NC(=O)c3ccccc3)CC2)cc(OC)c2ccccc12',
      '[*]c1c(OC)cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c1[*]',
      '[*]c1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1ccccc1')
    expected = [_of(s) for s in expected]
    for smi in frags:
      self.assertIn(_of(smi), expected)
Ejemplo n.º 11
0
  def testFragmentation2(self):
    mol = Chem.MolFromSmiles('COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12')

    frags = FraggleSim.generate_fraggle_fragmentation(mol)
    self.assertEqual(len(frags), 13)

    expected = (
      '[*]C(=O)c1ccccc1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1',
      '[*]C(=O)c1ccccc1.[*]Cc1cc(OC)c2ccccc2c1OC', '[*]C(=O)c1ccccc1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1ccccc1',
      '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1ccccc1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1ccccc1',
      '[*]N1CCC(NC(=O)c2ccccc2)CC1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]NC(=O)c1ccccc1.[*]c1cc(OC)c2ccccc2c1OC',
      '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1ccccc1',
      '[*]c1c(CN2CCC(NC(=O)c3ccccc3)CC2)cc(OC)c2ccccc12',
      '[*]c1c(OC)cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c1[*]',
      '[*]c1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1ccccc1')
    expected = [_of(s) for s in expected]
    for smi in frags:
      self.assertIn(_of(smi), expected)
Ejemplo n.º 12
0
if __name__ == '__main__':
  import sys, re
  if (len(sys.argv) >= 2):
    print(
      "Program to run the first part of Fraggle. Program splits the molecule\nready for the search\n")
    print("USAGE: ./fraggle.py <file_of_smiles")
    print("Format of smiles file: SMILES ID (space or comma separated)")
    print("Output: whole mol smiles,ID,fraggle split smiles\n")
    sys.exit(1)

  #read the STDIN
  for line in sys.stdin:
    line = line.rstrip()
    smi, id_ = re.split('\s|,', line)
    #print smi,id_

    mol = Chem.MolFromSmiles(smi)

    if mol is None:
      sys.stderr.write("Can't generate mol for: %s\n" % (smi))
      continue

    out_fragments = FraggleSim.generate_fraggle_fragmentation(mol)
    #print out the unique fragments
    for x in out_fragments:
      #cansmi
      temp = Chem.MolFromSmiles(x)

      print("%s,%s,%s" % (smi, id_, Chem.MolToSmiles(temp)))
Ejemplo n.º 13
0
    # test_mol_neu.SetProp("NScode", mol.GetProp("NScode"))
    reference_desalt.append(test_mol_neu)
    count = count+1

print("Reference Molecule Num: ", len(reference_desalt))

# Morganによる類似性評価
morgan_reference = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in reference_desalt] #(mol,中心からの半径,bit数)
morgan_query = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in query]
tanimoto = DataStructs.BulkTanimotoSimilarity(morgan_query[0], morgan_reference)


# Fraggleによる類似性評価
fraggle_sim = []
fraggle_match = []
for (sim, match) in [FraggleSim.GetFraggleSimilarity(query[0], reference_desalt[i]) for i in range(len(reference_desalt))] :
    fraggle_sim.append(sim)
    fraggle_match.append(match)


# 結果をPDFファイルに書き込み
# img = Draw.MolsToGridImage(namiki[:16], molsPerRow=4, subImgSize=(300,200), legends=['Fraggle: {:.2f}'.format(i) for i in fraggle_similarity[:16]])
# img.save('./fraggle.pdf')

# 結果をSDFファイルに書き込み
sdf_filename = "./fraggle-result.sdf"
writer = Chem.SDWriter(sdf_filename)
query[0].SetProp("NScode", "2-17") #molオブジェクトにプロパティを付加する
query[0].SetProp("Tanimoto_Sim", "1")
query[0].SetProp("Fraggle_Sim", "1")
writer.write(query[0])
Ejemplo n.º 14
0
    if (iMol == None):
      sys.stderr.write("Can't generate mol for: %s\n" % (inSmi))
      continue

    #discard based on atom size
    if (iMol.GetNumAtoms() < query_size[qID] - 3):
      #sys.stderr.write("Too small: %s\n" % (inSmi) )
      continue

    if (iMol.GetNumAtoms() > query_size[qID] + 4):
      #sys.stderr.write("Too large: %s\n" % (inSmi) )
      continue

    #print '>>>',id_
    rdkit_sim, fraggle_sim = FraggleSim.compute_fraggle_similarity_for_subs(
      iMol, query_mols[qID], qSmi, qSubs, options.pfp)
    day_sim[qID][id_] = rdkit_sim
    frag_sim[qID][id_] = max(frag_sim[qID][id_], fraggle_sim)

    #check if you have the fp for the modified query
    #and generate if need to

  #right, print out the results for the query
  #Format: SMILES,ID,QuerySMI,QueryID,Fraggle_Similarity,Daylight_Similarity
  for qID in frag_sim:
    for id_ in frag_sim[qID]:
      if (frag_sim[qID][id_] >= fraggle_cutoff):
        print("%s,%s,%s,%s,%s,%s" %
              (id_to_smi[id_], id_, id_to_smi[qID], qID, frag_sim[qID][id_], day_sim[qID][id_]))
Ejemplo n.º 15
0
        if(iMol == None):
            sys.stderr.write("Can't generate mol for: %s\n" % (inSmi) )
            continue

        #discard based on atom size
        if(iMol.GetNumAtoms() < query_size[qID]-3):
            #sys.stderr.write("Too small: %s\n" % (inSmi) )
            continue;

        if(iMol.GetNumAtoms() > query_size[qID]+4):
            #sys.stderr.write("Too large: %s\n" % (inSmi) )
            continue;
        
        #print '>>>',id_
        rdkit_sim,fraggle_sim = FraggleSim.compute_fraggle_similarity_for_subs(iMol,query_mols[qID],qSmi,qSubs,
                                                                               options.pfp)
        day_sim[qID][id_] = rdkit_sim
        frag_sim[qID][id_] = max(frag_sim[qID][id_],fraggle_sim)

        #check if you have the fp for the modified query
        #and generate if need to



    #right, print out the results for the query
    #Format: SMILES,ID,QuerySMI,QueryID,Fraggle_Similarity,Daylight_Similarity
    for qID in frag_sim:
        for id_ in frag_sim[qID]:
            if(frag_sim[qID][id_] >= fraggle_cutoff):
                print("%s,%s,%s,%s,%s,%s" % (id_to_smi[id_],id_,id_to_smi[qID],qID,frag_sim[qID][id_],day_sim[qID][id_]))
Ejemplo n.º 16
0
if __name__ =='__main__':
    import sys,re
    if (len(sys.argv) >= 2):
        print "Program to run the first part of Fraggle. Program splits the molecule\nready for the search\n"
        print "USAGE: ./fraggle.py <file_of_smiles"
        print "Format of smiles file: SMILES ID (space or comma separated)"
        print "Output: whole mol smiles,ID,fraggle split smiles\n"
        sys.exit(1)

    #read the STDIN
    for line in sys.stdin:
        line = line.rstrip()
        smi,id_ = re.split('\s|,',line)
        #print smi,id_

        mol = Chem.MolFromSmiles(smi)

        if mol is None:
            sys.stderr.write("Can't generate mol for: %s\n" % (smi) )
            continue

        out_fragments = FraggleSim.generate_fraggle_fragmentation(mol)
        #print out the unique fragments
        for x in out_fragments:
            #cansmi
            temp = Chem.MolFromSmiles(x)

            print "%s,%s,%s" % (smi,id_,Chem.MolToSmiles(temp))