def score_peptide(peptide, gamma=5):
    peptide_feature = protein_features(peptide, lag_range=range(1, 5)).reshape(
        (1, -1))
    peptide_feature[:] = normalizer.transform(peptide_feature)
    return kernel_embedding_score(peptide_feature, gamma)
import pandas as pd
import numpy as np
from Bio import SeqIO
from math import exp

print('Loading the sequences...')

peptides = []
for seq in SeqIO.parse('Data/anti_microbial_peptide.fasta', 'fasta'):
    peptides.append(seq)

print('Loading the features...')

if False:
    features_reference = np.vstack(
        [protein_features(pep, lag_range=range(1, 5)) for pep in peptides])
    pd.DataFrame(features_reference).to_csv(
        'Data/anti_microbial_peptide_features.csv', index=False)

features_reference = pd.read_csv('Data/anti_microbial_peptide_features.csv')

normalizer = Normalizer()
features_reference = normalizer.fit_transform(features_reference.values)


def kernel_embedding_score(peptide_feature, gamma=0.01):
    """
    Calculates the score for kernel embedding
    """
    score = np.exp(-gamma * np.sum(
        (peptide_feature - features_reference)**2, 1)).mean()