def score_peptide(peptide, gamma=5): peptide_feature = protein_features(peptide, lag_range=range(1, 5)).reshape( (1, -1)) peptide_feature[:] = normalizer.transform(peptide_feature) return kernel_embedding_score(peptide_feature, gamma)
import pandas as pd import numpy as np from Bio import SeqIO from math import exp print('Loading the sequences...') peptides = [] for seq in SeqIO.parse('Data/anti_microbial_peptide.fasta', 'fasta'): peptides.append(seq) print('Loading the features...') if False: features_reference = np.vstack( [protein_features(pep, lag_range=range(1, 5)) for pep in peptides]) pd.DataFrame(features_reference).to_csv( 'Data/anti_microbial_peptide_features.csv', index=False) features_reference = pd.read_csv('Data/anti_microbial_peptide_features.csv') normalizer = Normalizer() features_reference = normalizer.fit_transform(features_reference.values) def kernel_embedding_score(peptide_feature, gamma=0.01): """ Calculates the score for kernel embedding """ score = np.exp(-gamma * np.sum( (peptide_feature - features_reference)**2, 1)).mean()