Ejemplo n.º 1
0
def prediction_whole_seq(protein_seqs,
                         alleles,
                         result_format="filtered",
                         comparison_quantity="affinity",
                         filter_value=500):
    """prediction_whole_seq 

	wraps the class1presentationpredictor from mhcflurry 2.0 for customizability
	
	Args:
	    protein_seqs (Dict): 	dictionary that maps a name like "protein_x": aa sequence
	    alleles (Dict): 		dictionary that maps a name like "sample_1": allele name
	    result_format (str, optional): 			Description
	    comparison_quantity (str, optional): 	Description
	    filter_value (int, optional): 			Description
	"""
    predictor = Class1PresentationPredictor.load()

    prediction_df = predictor.predict_sequences(
        sequences=protein_seqs,
        alleles=alleles,
        result=result_format,
        comparison_quantity=comparison_quantity,
        filter_value=filter_value,
        verbose=1)

    return prediction_df
Ejemplo n.º 2
0
def obtain_allele_list(preselected=None):

    if preselected is None:
        predictor = Class1PresentationPredictor.load()
        # snippet for obtaining supported alleles directly
        return predictor.supported_alleles

    else:
        with open(preselected, 'r') as f:
            all_lines = f.readlines()
            all_lines = list(map(lambda x: x.rstrip(), all_lines))
            return all_lines
Ejemplo n.º 3
0
def file_process(is_checked,
                 upload="./uploaded/multiple_query.txt",
                 download="./app/download/result.txt"):
    table_scaled = wrapper_read_scaling()  # [21,553]
    after_pca = pca_apply_reduction(table_scaled)  # [21,12]
    hla = pd.read_csv('hla2paratopeTable_aligned.txt', sep='\t')
    hla_dic = hla_df_to_dic(hla)
    inventory = list(hla_dic.keys())
    dic_inventory = dict_inventory(inventory)

    cnn_model = seperateCNN()
    cnn_model.load_weights('cnn_model_331_3_7/')

    ori_score = pd.read_csv(upload, sep=',', header=None)
    ori_score.columns = ['peptide', 'HLA']
    ori_score['immunogenicity'] = ['0'] * ori_score.shape[0]
    print('************************ 1 *************************'
          )  # may need to remove
    dataset_score, hla_type = construct_aaindex(ori_score, hla_dic, after_pca,
                                                dic_inventory)

    input1_score = pull_peptide_aaindex(dataset_score)
    input2_score = pull_hla_aaindex(dataset_score)
    label_score = pull_label_aaindex(dataset_score)
    scoring = cnn_model.predict(x=[input1_score, input2_score])
    scoring = cnn_model.predict(x=[input1_score, input2_score])
    ori_score['immunogenicity'] = scoring

    if is_checked == 'True':  # see the compute_m function for why we set up like this
        m = ori_score['HLA'].values.tolist()  # a list of HLA
        p = ori_score['peptide'].values.tolist()  # a list of peptides
        tmp_dic_for_alleles = {}
        for index, mhc_ in enumerate(m):
            tmp_dic_for_alleles['sample{}'.format(index)] = [mhc_]
        predictor = Class1PresentationPredictor.load()
        result = predictor.predict(peptides=p,
                                   alleles=tmp_dic_for_alleles,
                                   verbose=0)
        final = []
        for sample, chunk in result.groupby(by='sample_name'):
            index = int(sample[-1:])
            final.append(chunk.iloc[index, :]['presentation_score'])
        ori_score['binding(mhcflurry)'] = final

    ori_score.to_csv(download, sep='\t', index=None)
Ejemplo n.º 4
0
def score_with_mhcflurry(peptides: np.ndarray, alleles: np.ndarray):
    predictor = Class1PresentationPredictor.load()
    predictor_scores = predictor.predict(peptides=peptides,
                                         alleles=alleles,
                                         verbose=0)
Ejemplo n.º 5
0
def binding_score_from_mhcflurry_s(peptide, mhc):
    predictor = Class1PresentationPredictor.load()
    result = predictor.predict(peptides=[peptide], alleles=[mhc], verbose=0)
    binding = result.iloc[0]['presentation_score']
    return float(binding)
Ejemplo n.º 6
0
def computing_m(peptide, mhc, is_checked):  # multiple MHC query
    table_scaled = wrapper_read_scaling()  # [21,553]
    after_pca = pca_apply_reduction(table_scaled)  # [21,12]
    hla = pd.read_csv('hla2paratopeTable_aligned.txt', sep='\t')
    hla_dic = hla_df_to_dic(hla)
    inventory = list(hla_dic.keys())
    dic_inventory = dict_inventory(inventory)

    cnn_model = seperateCNN()
    cnn_model.load_weights('cnn_model_331_3_7/')

    hla_score = [
        'HLA-A*0101', 'HLA-A*0201', 'HLA-A*0202', 'HLA-A*0301', 'HLA-A*1101',
        'HLA-A*2402', 'HLA-A*6802', 'HLA-B*0702', 'HLA-B*0801', 'HLA-B*3501',
        'HLA-B*4402'
    ]

    peptide_score = [peptide] * len(hla_score)
    immuno_score = ['0'] * len(hla_score)
    ori_score = pd.DataFrame({
        'peptide': peptide_score,
        'HLA': hla_score,
        'immunogenicity': immuno_score
    })
    dataset_score, hla_type = construct_aaindex(ori_score, hla_dic, after_pca,
                                                dic_inventory)
    input1_score = pull_peptide_aaindex(dataset_score)
    input2_score = pull_hla_aaindex(dataset_score)
    label_score = pull_label_aaindex(dataset_score)
    scoring = cnn_model.predict(x=[input1_score, input2_score])
    ori_score['immunogenicity'] = scoring
    ori_score.sort_values(by=['immunogenicity'], ascending=False, inplace=True)
    top5 = ori_score.iloc[0:5]

    p = top5['peptide'].tolist()
    m = top5['HLA'].tolist()
    i = [item for item in top5['immunogenicity']]

    # for these 5 complex, compute binding affnity
    if is_checked == 'True':
        '''
        strange input requirement: when you have 5 peptides, 5 mhc, you have to construct like this:
        a = predictor.predict(
            peptides=["NLVPMVATV","AAAAAAAAA","TTTTTTTT","PPPPPPPP","QQQQQQQQ"],
            alleles={'sample0': ['HLA-C*0517'], 'sample1': ['HLA-C*0602'], 'sample2': ['HLA-C*0401'], 'sample3': ['HLA-B*4403'], 'sample4': ['HLA-B*5101']},
            verbose=0)

        then since they compute a cross-product, I need to pick the value I need from the returned result
        '''

        tmp_dic_for_alleles = {}
        for index, mhc_ in enumerate(m):
            tmp_dic_for_alleles['sample{}'.format(index)] = [mhc_]
        predictor = Class1PresentationPredictor.load()
        result = predictor.predict(peptides=p,
                                   alleles=tmp_dic_for_alleles,
                                   verbose=0)
        final = []
        for sample, chunk in result.groupby(by='sample_name'):
            index = int(sample[-1:])
            final.append(chunk.iloc[index, :]['presentation_score'])
    else:
        final = ['NA', 'NA', 'NA', 'NA', 'NA']
    return p, m, i, final
Ejemplo n.º 7
0
from mhcflurry import Class1PresentationPredictor

predictor = Class1PresentationPredictor.load()


print(len(predictor.supported_alleles))

for item in predictor.supported_alleles:
    print(item)


# alleles = predictor.supported_alleles

# df = predictor.predict(
#     peptides=["SIINFEKL", "NLVPMVATV"],
#     alleles=["HLA-A0201", "HLA-A0301"],
#     verbose=0
# )
#
# print(df)
#
#
# def test_affinity():
#     from mhcflurry import Class1AffinityPredictor
#     affinity_predictor = Class1AffinityPredictor.load()
#     df = affinity_predictor.predict_to_dataframe(alleles="HLA-A0201", peptides=["SIINFEKL", "SIINFEQL"])
#     print(df)
Ejemplo n.º 8
0
def MHCflurry(df, alleles_list):
    predictor = Class1PresentationPredictor.load()
    #create dicts of index and nmer
    mut_dict = {}
    wt_dict = {}

    for i, r in df.loc[((df['Mut nmer'] != '-') &
                        (df['Mut nmer'].str.len() >= 8))].iterrows():
        mut_dict[i] = r['Mut nmer']
    for i, r in df.loc[((df['Wt nmer'] != '-') &
                        (df['Wt nmer'].str.len() >= 8))].iterrows():
        wt_dict[i] = r['Wt nmer']

    ##for whatever reason the result "all" isn't outputting everything
    ##this works fine if I loop through each HLA and since MHCflurry is
    ##quick this shouldn't be much of a problem
    MHCFlurryMut = pd.DataFrame()
    MHCFlurryWt = pd.DataFrame()

    for a in alleles_list:
        MHCFlurryMut = MHCFlurryMut.append(predictor.predict_sequences(
            sequences=mut_dict,
            result='all',
            alleles=[a],
            peptide_lengths=(8, 9, 10, 11, 12),
            use_flanks=True,
            verbose=0),
                                           ignore_index=True)
        MHCFlurryWt = MHCFlurryWt.append(predictor.predict_sequences(
            sequences=wt_dict,
            result='all',
            alleles=[a],
            peptide_lengths=(8, 9, 10, 11, 12),
            use_flanks=True,
            verbose=0),
                                         ignore_index=True)

    ##add lengths in
    MHCFlurryMut['peptide length'] = MHCFlurryMut['peptide'].str.len()
    MHCFlurryWt['peptide length'] = MHCFlurryWt['peptide'].str.len()

    ##rename some columns
    MHCFlurryMut.rename(columns={'peptide': 'Mutant peptide', 'affinity': 'MHCflurry mutant affinity',\
                                'best_allele':'HLA', 'affinity_percentile':'MHCflurry affinity percentile rank mutant',\
                                'processing_score':'MHCflurry processing score mutant', 'presentation_score':'MHCflurry presentation score mutant'},\
                    inplace = True)
    MHCFlurryWt.rename(columns={'peptide': 'Wild type peptide', 'affinity': 'MHCflurry wild type affinity',\
                                'best_allele':'HLA', 'affinity_percentile':'MHCflurry affinity percentile rank wild type',\
                                'processing_score':'MHCflurry processing score wild type', 'presentation_score':'MHCflurry presentation score wild type'},\
                    inplace = True)

    ##merge mutant and wild type and remove wild type peptied in mutant peptide column
    merged = MHCFlurryMut.merge(MHCFlurryWt, how = 'left', left_on = ['sequence_name', 'pos', 'HLA', 'peptide length'],\
                    right_on = ['sequence_name', 'pos', 'HLA', 'peptide length'] )

    merged = merged.loc[~(merged['Mutant peptide']==merged['Wild type peptide'])][['sequence_name', 'HLA', 'peptide length','Mutant peptide','Wild type peptide',\
                                                                                    'MHCflurry mutant affinity','MHCflurry wild type affinity','MHCflurry affinity percentile rank mutant',\
                                                                                    'MHCflurry affinity percentile rank wild type','MHCflurry processing score mutant', 'MHCflurry processing score wild type',\
                                                                                    'MHCflurry presentation score mutant','MHCflurry presentation score wild type',]]

    ##merge with original dataframe
    out = df.merge(merged,
                   how='left',
                   left_index=True,
                   right_on='sequence_name').drop(columns='sequence_name')
    out['MHCFlurry Wt:Mut rank'] = out.apply(lambda x: WT2MutpercentileRank(
        x['MHCflurry affinity percentile rank wild type'], x[
            'MHCflurry affinity percentile rank mutant']),
                                             axis=1)

    ##return out
    return out
Ejemplo n.º 9
0
def obtain_allele_list():
    predictor = Class1PresentationPredictor.load()
    # snippet for obtaining supported alleles directly
    return predictor.supported_alleles