def test_fold(self): v = ob.vectorUnsignedInt([0x2A, 0x41]) self.assertEqual(len(v), 2) x = ob.OBFingerprint.FindFingerprint("FP2") x.Fold(v, 32) self.assertEqual(len(v), 1) self.assertEqual(v[0], (0x2A | 0x41)) v = ob.vectorUnsignedInt([0x01, 0x04, 0x20, 0x00]) self.assertEqual(len(v), 4) x.Fold(v, 64) self.assertEqual(len(v), 2) self.assertEqual(v[0], 0x21) self.assertEqual(v[1], 0x04)
def testOptions(self): mol = pybel.readstring("smi", "ICBr") bv = self.createBitVec(4, (1, 3)) ans = ["[I].[Br]", "I.Br", "I*.Br*"] ans_atomorder = [[1, 3], [1, 3], [1, 3, 2, 2]] ans_bondorder = [ [], [], [0, 1] ] for option in range(3): nmol = ob.OBMol() atomorder = ob.vectorUnsignedInt() bondorder = ob.vectorUnsignedInt() ok = mol.OBMol.CopySubstructure(nmol, bv, None, option, atomorder, bondorder) self.assertTrue(ok) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), ans[option]) self.assertEqual(ans_atomorder[option], list(atomorder)) self.assertEqual(ans_bondorder[option], list(bondorder))
def test_get_set(self): v = ob.vectorUnsignedInt([1, 6]) # XXX Why does GetBit need an actual instance? x = ob.OBFingerprint.FindFingerprint("FP2") self.assertTrue(x.GetBit(v, 0)) for i in range(1, 32): self.assertFalse(x.GetBit(v, i), i) self.assertFalse(x.GetBit(v, 32)) self.assertTrue(x.GetBit(v, 33)) self.assertTrue(x.GetBit(v, 34)) self.assertFalse(x.GetBit(v, 35)) x.SetBit(v, 35) self.assertTrue(x.GetBit(v, 35))
def get_mrlogP_descriptors(self, smiles: str, moltitle: str): # Descriptors are morgan, fp4, then USRCAT mrlogP_descriptor_length = 128 + 128 + 60 obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") ob_mol = openbabel.OBMol() # Create RDKit and OpenBabel molecules rdkit_mol = Chem.AddHs(smiles_to_3dmol(smiles, "querymol")) obConversion.ReadString(ob_mol, smiles) # Generate Morgan/ECFP4 morgan_fingerprint = AllChem.GetMorganFingerprintAsBitVect( Chem.RemoveHs(rdkit_mol), 2, 128).ToBitString() # Generate USRCAT usrcat_descriptors = GetUSRCAT(rdkit_mol) # Generate FP4 fp4fp = openbabel.vectorUnsignedInt() fingerprinter = openbabel.OBFingerprint.FindFingerprint("FP4") fingerprinter.GetFingerprint(ob_mol, fp4fp) openbabel.OBFingerprint.Fold(fingerprinter, fp4fp, 128) logP_descriptors = np.full((mrlogP_descriptor_length), np.nan) for i, v in enumerate(morgan_fingerprint): logP_descriptors[i] = float(v) fp4_p1 = [float(x) for x in list(format(fp4fp[0], '032b'))] fp4_p2 = [float(x) for x in list(format(fp4fp[1], '032b'))] fp4_p3 = [float(x) for x in list(format(fp4fp[2], '032b'))] fp4_p4 = [float(x) for x in list(format(fp4fp[3], '032b'))] logP_descriptors[128:256] = fp4_p1 + fp4_p2 + fp4_p3 + fp4_p4 for i, v in enumerate(usrcat_descriptors): logP_descriptors[256 + i] = float(v) return logP_descriptors
def test_fp_words(self): mol = parse_smiles("c1ccccc1O.C#N.[Ge].C1CCC1") def next_highest_power_of_two(n): i = 8 while i < n: i *= 2 return i for (name, nbits, v0, v1) in ( ("FP2", 1021, 0, 1), ("FP3", 55, 67108864, 1159170), ("FP4", 307, 2, 0), # TODO: change my MACCS.txt so it's correct # then rerun this test and change to the right answer ("MACCS", 166, 2097156, 256), ): fingerprinter = ob.OBFingerprint.FindFingerprint(name) v = ob.vectorUnsignedInt() fingerprinter.GetFingerprint(mol, v) size = next_highest_power_of_two(nbits) // 32 # bits-per-int self.assertEqual(len(v), size) self.assertEqual(v[0], v0, (name, v[0], v0)) self.assertEqual(v[1], v1, (name, v[1], v1))
def test_tanimoto_with_no_set_bits(self): v1 = ob.vectorUnsignedInt([0, 0, 0, 0]) x = ob.OBFingerprint.FindFingerprint("FP2") # Again, this is an arbitrary decision by toolkit providers self.assertEqual(x.Tanimoto(v1, v1), 0.0)
def test_tanimoto_size_mismatch(self): v1 = ob.vectorUnsignedInt([0x1, 0x6]) v2 = ob.vectorUnsignedInt([1, 2, 0]) x = ob.OBFingerprint.FindFingerprint("FP2") self.assertEqual(x.Tanimoto(v1, v2), -1.0)
def test_tanimoto(self): v1 = ob.vectorUnsignedInt([0x1, 0x6]) v2 = ob.vectorUnsignedInt([0x1, 0x7]) x = ob.OBFingerprint.FindFingerprint("FP2") self.assertEqual(x.Tanimoto(v1, v2), (1 + 2) / (1 + 3 + 0.0))