def encode_data(data, wildcard_encoding): aa_encoder = encoding.BlossumEncoder() pad_dict = None train_x = [] with open('padding.pickle', 'rb') as f: pad_dict = pickle.load(f) for peptide,pocket,measure in data: encoded_peptide = pad_encode_peptide(peptide, pad_dict, aa_encoder, wildcard_encoding) train_x.append(NNAlign.core_encoding(encoded_peptide, len(peptide), pocket, 9)) return np.array(train_x)
import numpy as np from keras.utils import plot_model import csv import pickle import NNAlign from collections import defaultdict pg = encoding.PositionGetter('pocket_positions.pickle') positions = pg.get_pocket_positions('A' * 9) print('positions') print(positions) pseudosequences = encoding.get_pseudosequences_dict('mhc_mapper.csv', 'complete_mhc.fasta', positions) blencoder = encoding.BlossumEncoder() def encode_sequence(sequence): """ letters = ['A', 'C', 'D', 'E', 'F', 'G' ,'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'] a = [0]*len(letters) encoded = [] for x in sequence: a = [0.1]*len(letters) a[letters.index(x)] = 0.9 encoded.append(np.array(a)) return np.array(encoded) """ return np.array([blencoder.encode_aa(x) for x in sequence])