def test_monomer_importer(json_importer, independent_importers): monomer_importer = importers.MonomerImporter(json_importer) ids = monomer_importer.import_data() monomer_repo = repo.create_monomer_repository() backbone_repo = repo.create_backbone_repository() monomer_data = list(monomer_repo.load(ids)) monomer_docs = json_importer.load(monomer_importer.saver.TYPE.STRING) backbone_data = list(backbone_repo.load()) kekules = [doc['kekule'] for doc in monomer_docs] backbones = [mol.to_reduced_dict() for mol in backbone_data] assert(len(monomer_data) == 4) for mol in monomer_data: rdkit_mol = mol.mol assert(mol._id != None) assert(mol.required == bool(AllChem.CalcNumAromaticRings(rdkit_mol))) assert(mol.backbone in backbones) assert(mol.sidechain is None) assert(mol.connection is None) assert(mol.proline == bool(AllChem.CalcNumAliphaticRings( rdkit_mol) and rdkit_mol.HasSubstructMatch(PROLINE_N_TERM))) assert(mol.imported == True) assert(mol.kekule in kekules) kekules.remove(mol.kekule)
def featurize(aa): mol = Chem.MolFromFASTA(aa) mol = Chem.AddHs(mol) descriptors = { 'MolWT': AllChem.CalcExactMolWt(mol), 'LogP': Chem.Crippen.MolLogP(mol), 'HBondDonors': AllChem.CalcNumLipinskiHBD(mol), 'HBondAcceptors': AllChem.CalcNumLipinskiHBA(mol), 'nAromaticRings': AllChem.CalcNumAromaticRings(mol), 'nHeteroAtoms': AllChem.CalcNumHeteroatoms(mol), 'nRotatableBonds': AllChem.CalcNumRotatableBonds(mol) }
def PhyChem(smiles): """ Calculating the 19D physicochemical descriptors for each molecules, the value has been normalized with Gaussian distribution. Arguments: smiles (list): list of SMILES strings. Returns: props (ndarray): m X 19 matrix as normalized PhysChem descriptors. m is the No. of samples """ props = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) try: MW = desc.MolWt(mol) LOGP = Crippen.MolLogP(mol) HBA = Lipinski.NumHAcceptors(mol) HBD = Lipinski.NumHDonors(mol) rotable = Lipinski.NumRotatableBonds(mol) amide = AllChem.CalcNumAmideBonds(mol) bridge = AllChem.CalcNumBridgeheadAtoms(mol) heteroA = Lipinski.NumHeteroatoms(mol) heavy = Lipinski.HeavyAtomCount(mol) spiro = AllChem.CalcNumSpiroAtoms(mol) FCSP3 = AllChem.CalcFractionCSP3(mol) ring = Lipinski.RingCount(mol) Aliphatic = AllChem.CalcNumAliphaticRings(mol) aromatic = AllChem.CalcNumAromaticRings(mol) saturated = AllChem.CalcNumSaturatedRings(mol) heteroR = AllChem.CalcNumHeterocycles(mol) TPSA = MolSurf.TPSA(mol) valence = desc.NumValenceElectrons(mol) mr = Crippen.MolMR(mol) # charge = AllChem.ComputeGasteigerCharges(mol) prop = [ MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr ] except Exception: print(smile) prop = [0] * 19 props.append(prop) props = np.array(props) props = Scaler().fit_transform(props) return props
def is_required(monomer): return bool( AllChem.CalcNumAromaticRings(Chem.MolFromSmiles(monomer['kekule'])))
def is_required(mol): return bool(AllChem.CalcNumAromaticRings(mol))