def computeIndexFeatures(aaAlphabet, peptide, index, polarAa, hydrophobicAa):
    features = []
    aas = dm.getAminoAcidList(peptide)
    features.append(indexSum(aas, index))
    features.append(indexAvg(aas, index))
    features.append(indexN(aas, index))
    features.append(indexC(aas, index))
    features.append(indexNearestNeighbour(aas, index, polarAa))

    maxPartSum5, minPartSum5 = indexMaxMinPartialSum(aas, index, 5)
    maxPartSum2, minPartSum2 = indexMaxMinPartialSum(aas, index, 2)
    features.append(maxPartSum5)
    features.append(maxPartSum2)
    features.append(minPartSum5)
    features.append(minPartSum2)

    maxHsideHelix, minHsideHelix = indexMaxMinHydrophobicSideHelix(aas, index)
    features.append(maxHsideHelix)
    features.append(minHsideHelix)

    maxHmoment100, minHmoment100 = indexMaxMinHydrophobicMoment(aas, index, 100, 11)
    maxHmoment180, minHmoment180 = indexMaxMinHydrophobicMoment(aas, index, 180, 11)
    features.append(maxHmoment100)
    features.append(maxHmoment180)
    features.append(minHmoment100)
    features.append(minHmoment180)

    features.append(indexSumSquaredDiff(aas, index))
    features.append(numberTypeAA(aas, polarAa))
    features.append(numberConsecTypeAA(aas, polarAa))
    features.append(numberTypeAA(aas, hydrophobicAa))
    features.append(numberConsecTypeAA(aas, hydrophobicAa))
    return features
Beispiel #2
0
def computeRetentionFeatureMatrix(aaAlphabet, psmDescriptions, customIndex):
  ptmsPresent = False
  if hasPtms(aaAlphabet):
    numFeatures = 20 + 1 + len(aaAlphabet)
    ptmsPresent = True
  else:
    numFeatures = 20 + 20 + 2 + len(aaAlphabet)
  
  featureMatrix = np.zeros((len(psmDescriptions),0))
  if not ptmsPresent:
    polarAa, hydrophobicAa = getExtremeRetentionAA(kyteDoolittleIndex) 
    kyteDoolittleFeatureMatrix = np.zeros((len(psmDescriptions), 20))
    for i, psmd in enumerate(psmDescriptions):
      kyteDoolittleFeatureMatrix[i] = computeIndexFeatures(aaAlphabet, psmd.peptide, kyteDoolittleIndex, polarAa, hydrophobicAa)
    featureMatrix = np.concatenate((featureMatrix, kyteDoolittleFeatureMatrix), axis = 1)
  
  polarAa, hydrophobicAa = getExtremeRetentionAA(customIndex)
  customFeatureMatrix = np.zeros((len(psmDescriptions), 20))
  for i, psmd in enumerate(psmDescriptions):
    customFeatureMatrix[i] = computeIndexFeatures(aaAlphabet, psmd.peptide, customIndex, polarAa, hydrophobicAa)
  featureMatrix = np.concatenate((featureMatrix, customFeatureMatrix), axis = 1)
  
  if not ptmsPresent:
    bulkinessFeatureVector = np.zeros((len(psmDescriptions), 1))
    for i, psmd in enumerate(psmDescriptions):
      aas = dm.getAminoAcidList(psmd.peptide)
      bulkinessFeatureVector[i] = indexSum(aas, bulkinessIndex)
    featureMatrix = np.concatenate((featureMatrix, bulkinessFeatureVector), axis = 1)
    
  lengthFeatureVector = np.zeros((len(psmDescriptions), 1))
  for i, psmd in enumerate(psmDescriptions):
    aas = dm.getAminoAcidList(psmd.peptide)
    lengthFeatureVector[i] = len(aas)
  featureMatrix = np.concatenate((featureMatrix, lengthFeatureVector), axis = 1)
  
  aaFeatureVector = computeRetentionIndexFeatureMatrix(aaAlphabet, psmDescriptions)
  featureMatrix = np.concatenate((featureMatrix, aaFeatureVector), axis = 1)
  
  normalizeFeatures(featureMatrix)
  return featureMatrix
Beispiel #3
0
def getBOWfeature(file,num):
    hydrophobicity =[]
    peptide_length = []
    psmDescriptions, aaAlphabet = el.processTrainData(file)
    Index = rm.buildRetentionIndex(aaAlphabet,psmDescriptions,True)
    customIndex = dict(zip(aaAlphabet, Index))
    for i, psmd in enumerate(psmDescriptions):
         aas = dm.getAminoAcidList(psmd.peptide)
         peptide_length.append(len(psmd.peptide))
         hydrophobicity.append(bowindexSum(aas, customIndex))
    peptide,rt = dg.extract(psmDescriptions,len(psmDescriptions))
    bow_feature = BOW(peptide,hydrophobicity,peptide_length,aaAlphabet,num)
    return psmDescriptions ,bow_feature
def computeRetentionFeatureVector(aaAlphabet, peptide, customIndex):
    ptmsPresent = False
    if hasPtms(aaAlphabet):
        numFeatures = 20 + 1 + len(aaAlphabet)
        ptmsPresent = True
    else:
        numFeatures = 20 + 20 + 2 + len(aaAlphabet)

    featureVector = np.zeros((1, 0))
    if not ptmsPresent:
        polarAa, hydrophobicAa = getExtremeRetentionAA(kyteDoolittleIndex)
        kyteDoolittleFeatureVector = np.zeros((1, 20))
        kyteDoolittleFeatureVector[:] = computeIndexFeatures(
            aaAlphabet, peptide, kyteDoolittleIndex, polarAa, hydrophobicAa
        )
        featureVector = np.concatenate((featureVector, kyteDoolittleFeatureVector), axis=1)

    polarAa, hydrophobicAa = getExtremeRetentionAA(customIndex)
    customFeatureVector = np.zeros((1, 20))
    customFeatureVector[:] = computeIndexFeatures(aaAlphabet, peptide, customIndex, polarAa, hydrophobicAa)
    featureVector = np.concatenate((featureVector, customFeatureVector), axis=1)

    if not ptmsPresent:
        bulkinessFeatureVector = np.zeros((1, 1))
        aas = dm.getAminoAcidList(peptide)
        bulkinessFeatureVector[:] = indexSum(aas, bulkinessIndex)
        featureVector = np.concatenate((featureVector, bulkinessFeatureVector), axis=1)

    lengthFeatureVector = np.zeros((1, 1))
    aas = dm.getAminoAcidList(peptide)
    lengthFeatureVector[:] = len(aas)
    featureVector = np.concatenate((featureVector, lengthFeatureVector), axis=1)

    aaFeatureVector = computeRetentionIndexFeatureMatrix(aaAlphabet, peptide)
    featureVector = np.concatenate((featureVector, aaFeatureVector), axis=1)

    normalizeFeatures(featureVector)
    return np.array(featureVector[0])
def computeRetentionIndexFeatures(aaAlphabet, peptide):
    aas = dm.getAminoAcidList(peptide)
    featureVector = np.zeros((1, len(aaAlphabet)))
    for aa in aas:
        featureVector[0][aaAlphabet.index(aa)] += 1
    return featureVector