def compareDistances(actives, decoys): actives_fps = utilities.getFingerprintList(actives)[0] decoys_fps = utilities.getFingerprintList(decoys)[0] distances_min = [] distances_max = [] for decoy in decoys_fps: dists = utilities.getMolDistFromSet(decoy, actives_fps) distances_min.append(dists[0]) distances_max.append(dists[1]) return numpy.mean(distances_min), numpy.mean(distances_max)
def predict(classmodel, regressmodel, molfile_path): print "Starting predictions for: " + molfile_path suppl = Chem.SDMolSupplier(molfile_path) mols = dict() for mol in suppl: pmol = PropertyMol.PropertyMol(mol) mols[pmol.GetProp("_Name")] = {"RDKit" : pmol} fingerprinter.appendMorganFingerprints(mols, dump=None) actives = pickle.load(open(ACTIVES_DUMP, 'rb')) found_sth = False for mol in mols: prediction = classmodel.predict(mols[mol]['fingerprint']) fingerprints_actives = utilities.getFingerprintList(actives)[0] min_distance = utilities.getMolDistFromSet(mols[mol]['fingerprint'], fingerprints_actives)[0] if min_distance <= APPLICABILITY_DOMAIN_DISTANCE_THRESHOLD and prediction[0]: print mol + " is active" print "Predicted pIC50: " + str(regressmodel.predict(mols[mol]['fingerprint'])[0]) found_sth = True if not found_sth: print "None of the molecules within the specified set was found to be active."