def computeIndexFeatures(aaAlphabet, peptide, index, polarAa, hydrophobicAa): features = [] aas = dm.getAminoAcidList(peptide) features.append(indexSum(aas, index)) features.append(indexAvg(aas, index)) features.append(indexN(aas, index)) features.append(indexC(aas, index)) features.append(indexNearestNeighbour(aas, index, polarAa)) maxPartSum5, minPartSum5 = indexMaxMinPartialSum(aas, index, 5) maxPartSum2, minPartSum2 = indexMaxMinPartialSum(aas, index, 2) features.append(maxPartSum5) features.append(maxPartSum2) features.append(minPartSum5) features.append(minPartSum2) maxHsideHelix, minHsideHelix = indexMaxMinHydrophobicSideHelix(aas, index) features.append(maxHsideHelix) features.append(minHsideHelix) maxHmoment100, minHmoment100 = indexMaxMinHydrophobicMoment(aas, index, 100, 11) maxHmoment180, minHmoment180 = indexMaxMinHydrophobicMoment(aas, index, 180, 11) features.append(maxHmoment100) features.append(maxHmoment180) features.append(minHmoment100) features.append(minHmoment180) features.append(indexSumSquaredDiff(aas, index)) features.append(numberTypeAA(aas, polarAa)) features.append(numberConsecTypeAA(aas, polarAa)) features.append(numberTypeAA(aas, hydrophobicAa)) features.append(numberConsecTypeAA(aas, hydrophobicAa)) return features
def computeRetentionFeatureMatrix(aaAlphabet, psmDescriptions, customIndex): ptmsPresent = False if hasPtms(aaAlphabet): numFeatures = 20 + 1 + len(aaAlphabet) ptmsPresent = True else: numFeatures = 20 + 20 + 2 + len(aaAlphabet) featureMatrix = np.zeros((len(psmDescriptions),0)) if not ptmsPresent: polarAa, hydrophobicAa = getExtremeRetentionAA(kyteDoolittleIndex) kyteDoolittleFeatureMatrix = np.zeros((len(psmDescriptions), 20)) for i, psmd in enumerate(psmDescriptions): kyteDoolittleFeatureMatrix[i] = computeIndexFeatures(aaAlphabet, psmd.peptide, kyteDoolittleIndex, polarAa, hydrophobicAa) featureMatrix = np.concatenate((featureMatrix, kyteDoolittleFeatureMatrix), axis = 1) polarAa, hydrophobicAa = getExtremeRetentionAA(customIndex) customFeatureMatrix = np.zeros((len(psmDescriptions), 20)) for i, psmd in enumerate(psmDescriptions): customFeatureMatrix[i] = computeIndexFeatures(aaAlphabet, psmd.peptide, customIndex, polarAa, hydrophobicAa) featureMatrix = np.concatenate((featureMatrix, customFeatureMatrix), axis = 1) if not ptmsPresent: bulkinessFeatureVector = np.zeros((len(psmDescriptions), 1)) for i, psmd in enumerate(psmDescriptions): aas = dm.getAminoAcidList(psmd.peptide) bulkinessFeatureVector[i] = indexSum(aas, bulkinessIndex) featureMatrix = np.concatenate((featureMatrix, bulkinessFeatureVector), axis = 1) lengthFeatureVector = np.zeros((len(psmDescriptions), 1)) for i, psmd in enumerate(psmDescriptions): aas = dm.getAminoAcidList(psmd.peptide) lengthFeatureVector[i] = len(aas) featureMatrix = np.concatenate((featureMatrix, lengthFeatureVector), axis = 1) aaFeatureVector = computeRetentionIndexFeatureMatrix(aaAlphabet, psmDescriptions) featureMatrix = np.concatenate((featureMatrix, aaFeatureVector), axis = 1) normalizeFeatures(featureMatrix) return featureMatrix
def getBOWfeature(file,num): hydrophobicity =[] peptide_length = [] psmDescriptions, aaAlphabet = el.processTrainData(file) Index = rm.buildRetentionIndex(aaAlphabet,psmDescriptions,True) customIndex = dict(zip(aaAlphabet, Index)) for i, psmd in enumerate(psmDescriptions): aas = dm.getAminoAcidList(psmd.peptide) peptide_length.append(len(psmd.peptide)) hydrophobicity.append(bowindexSum(aas, customIndex)) peptide,rt = dg.extract(psmDescriptions,len(psmDescriptions)) bow_feature = BOW(peptide,hydrophobicity,peptide_length,aaAlphabet,num) return psmDescriptions ,bow_feature
def computeRetentionFeatureVector(aaAlphabet, peptide, customIndex): ptmsPresent = False if hasPtms(aaAlphabet): numFeatures = 20 + 1 + len(aaAlphabet) ptmsPresent = True else: numFeatures = 20 + 20 + 2 + len(aaAlphabet) featureVector = np.zeros((1, 0)) if not ptmsPresent: polarAa, hydrophobicAa = getExtremeRetentionAA(kyteDoolittleIndex) kyteDoolittleFeatureVector = np.zeros((1, 20)) kyteDoolittleFeatureVector[:] = computeIndexFeatures( aaAlphabet, peptide, kyteDoolittleIndex, polarAa, hydrophobicAa ) featureVector = np.concatenate((featureVector, kyteDoolittleFeatureVector), axis=1) polarAa, hydrophobicAa = getExtremeRetentionAA(customIndex) customFeatureVector = np.zeros((1, 20)) customFeatureVector[:] = computeIndexFeatures(aaAlphabet, peptide, customIndex, polarAa, hydrophobicAa) featureVector = np.concatenate((featureVector, customFeatureVector), axis=1) if not ptmsPresent: bulkinessFeatureVector = np.zeros((1, 1)) aas = dm.getAminoAcidList(peptide) bulkinessFeatureVector[:] = indexSum(aas, bulkinessIndex) featureVector = np.concatenate((featureVector, bulkinessFeatureVector), axis=1) lengthFeatureVector = np.zeros((1, 1)) aas = dm.getAminoAcidList(peptide) lengthFeatureVector[:] = len(aas) featureVector = np.concatenate((featureVector, lengthFeatureVector), axis=1) aaFeatureVector = computeRetentionIndexFeatureMatrix(aaAlphabet, peptide) featureVector = np.concatenate((featureVector, aaFeatureVector), axis=1) normalizeFeatures(featureVector) return np.array(featureVector[0])
def computeRetentionIndexFeatures(aaAlphabet, peptide): aas = dm.getAminoAcidList(peptide) featureVector = np.zeros((1, len(aaAlphabet))) for aa in aas: featureVector[0][aaAlphabet.index(aa)] += 1 return featureVector