def make_prediction(peptide, allele_sequence, model=None): mhc_seq = padded_indices([allele_sequence], add_start_symbol=False, add_end_symbol=False, index_dict=amino_acid_letter_indices) #returns an array of index encoded peptide/peptides depending on peptide length X_p = padded_indices(format_peptide(peptide), add_start_symbol=False, add_end_symbol=False, index_dict=amino_acid_letter_indices) #tiling the mhc in case the peptide is more than a length of 9 mhc_seq = np.tile(mhc_seq,(len(X_p),1)) preds = 0 #mean of the predicted outputs in case peptide is more than length of 9 if(model): preds = model.predict({'peptide':X_p,'mhc':mhc_seq})['output'] preds = np.mean(preds) return preds
def get_model_data(allele_list, allele_sequence_data, allele_binding_data, peptide_length =9, mhc_length=None, ): ''' generate training data for a list of alleles where the output is index_encoded peptide, dense MHC sequence and the log-transformed ic50 values ''' data_len =0 for allele in allele_list: data_len += len(allele_binding_data[allele][1]) X_p = np.zeros((data_len,peptide_length)) X_mhc = np.zeros((data_len,mhc_length)) Y_combined = np.zeros(data_len) index=0 for allele in allele_list: peptides = allele_binding_data[allele][1] end_index = index+len(peptides) #index encoding for peptides X_p[index:end_index] = padded_indices(peptides, add_start_symbol=False, add_end_symbol=False, index_dict=amino_acid_letter_indices) #dense vector for mhc mhc_seq = padded_indices([allele_sequence_data[allele]], add_start_symbol=False, add_end_symbol=False, index_dict=amino_acid_letter_indices) X_mhc[index:end_index] = np.tile(mhc_seq,(len(peptides),1)) #log-transformed binding values Y_combined[index:end_index] = allele_binding_data[allele][0] index+=len(peptides) arr = np.arange(len(X_p)) np.random.shuffle(arr) X_p_shuffled = X_p[arr] X_mhc_shuffled = X_mhc[arr] log_binding_values_shuffled = Y_combined[arr] return X_p_shuffled, X_mhc_shuffled, log_binding_values_shuffled