예제 #1
0
def encode_data(data, wildcard_encoding):
    aa_encoder = encoding.BlossumEncoder()
    pad_dict = None
    train_x = []
    with open('padding.pickle', 'rb') as f:
        pad_dict = pickle.load(f)
    for peptide,pocket,measure in data:
        encoded_peptide = pad_encode_peptide(peptide, pad_dict, aa_encoder, wildcard_encoding)
        train_x.append(NNAlign.core_encoding(encoded_peptide, len(peptide), pocket, 9))
    return np.array(train_x)
예제 #2
0
import numpy as np
from keras.utils import plot_model
import csv
import pickle
import NNAlign
from collections import defaultdict

pg = encoding.PositionGetter('pocket_positions.pickle')
positions = pg.get_pocket_positions('A' * 9)
print('positions')
print(positions)

pseudosequences = encoding.get_pseudosequences_dict('mhc_mapper.csv',
                                                    'complete_mhc.fasta',
                                                    positions)
blencoder = encoding.BlossumEncoder()


def encode_sequence(sequence):
    """
    letters = ['A', 'C', 'D', 'E', 'F', 'G' ,'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
    a = [0]*len(letters)
    encoded = []
    for x in sequence:
        a = [0.1]*len(letters)
        a[letters.index(x)] = 0.9
        encoded.append(np.array(a))
    return np.array(encoded)
    """
    return np.array([blencoder.encode_aa(x) for x in sequence])