def constructPattern(contactsTrainDict, chemTrainDict, prefix, sType = 3): pattern = [] contributed = [] total = 0 for molName, vector in contactsTrainDict.items(): if prefix != molName[:3]: continue total += 1 if not len(pattern): for bit in range(len(vector)): contributed.append({}) if vector[bit]: pattern.append(1) contributed[bit][molName] = chemTrainDict[molName] else: pattern.append(0) else: for bit in vector.GetOnBits(): if sType == 1: pattern[bit] += 1 elif sType == 2: pattern[bit] += (1 - averageFromMolSimilDict(getCompoundToSetSimilarity(chemTrainDict[molName], contributed[bit]))) else: pattern[bit] += (1 - maxFromMolSimilDict(getCompoundToSetSimilarity(chemTrainDict[molName], contributed[bit]))) contributed[bit][molName] = chemTrainDict[molName] if total > 0: return [x * 1. / total for x in pattern] else: return []
def getBestSimilarToMolset(vector, molsDict, longNames = {}): resDict = getCompoundToSetSimilarity(vector, molsDict) return [{"id":x[4:], "type": "inhibitor" if x[:3].lower() == 'yes' else "notinhibitor", "longname":longNames[x], "similarity":int(resDict[x]*100)} for x in sorted(resDict.keys(), key = resDict.get, reverse=True)]