예제 #1
0
def make_prediction(peptide, allele_sequence, model=None):
    mhc_seq = padded_indices([allele_sequence],
                                    add_start_symbol=False,
                                    add_end_symbol=False,
                                    index_dict=amino_acid_letter_indices)


    #returns an array of index encoded peptide/peptides depending on peptide length

    X_p = padded_indices(format_peptide(peptide),
                            add_start_symbol=False,
                            add_end_symbol=False,
                            index_dict=amino_acid_letter_indices)

    #tiling the mhc in case the peptide is more than a length of 9

    mhc_seq = np.tile(mhc_seq,(len(X_p),1))
    preds = 0

    #mean of the predicted outputs in case peptide is more than length of 9

    if(model):
        preds = model.predict({'peptide':X_p,'mhc':mhc_seq})['output']
        preds = np.mean(preds)

    return preds
def get_model_data(allele_list,
                allele_sequence_data,
                allele_binding_data,
                peptide_length =9,
                mhc_length=None,
                ):

    '''
    generate training data for a list of alleles where
    the output is index_encoded peptide, dense MHC sequence
    and the log-transformed ic50 values
    '''

    data_len =0

    for allele in allele_list:
        data_len  += len(allele_binding_data[allele][1])

    X_p = np.zeros((data_len,peptide_length))
    X_mhc = np.zeros((data_len,mhc_length))
    Y_combined = np.zeros(data_len)

    index=0
    for allele in allele_list:
        peptides = allele_binding_data[allele][1]
        end_index = index+len(peptides)


        #index encoding for peptides
        X_p[index:end_index] = padded_indices(peptides,
                                            add_start_symbol=False,
                                            add_end_symbol=False,
                                            index_dict=amino_acid_letter_indices)


        #dense vector for mhc
        mhc_seq = padded_indices([allele_sequence_data[allele]],
                                add_start_symbol=False,
                                add_end_symbol=False,
                                index_dict=amino_acid_letter_indices)

        X_mhc[index:end_index] =  np.tile(mhc_seq,(len(peptides),1))


        #log-transformed binding values
        Y_combined[index:end_index] = allele_binding_data[allele][0]
        index+=len(peptides)


    arr = np.arange(len(X_p))
    np.random.shuffle(arr)
    X_p_shuffled = X_p[arr]
    X_mhc_shuffled = X_mhc[arr]
    log_binding_values_shuffled = Y_combined[arr]

    return X_p_shuffled, X_mhc_shuffled, log_binding_values_shuffled