예제 #1
0
파일: flu_rsl.py 프로젝트: tnakada/DeepTCR
import numpy as np
import pandas as pd
from DeepTCR.DeepTCR import DeepTCR_SS
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rc('font', family='Arial')

DTCRS = DeepTCR_SS('reg_flu', device=2)

alpha = 'CAGAGSQGNLIF'
beta = 'CASSSRSSYEQYF'
input_alpha = np.array([alpha, alpha])
input_beta = np.array([beta, beta])
pred = DTCRS.Sequence_Inference(input_alpha, input_beta)
fig_rsl, ax_rsl = DTCRS.Residue_Sensitivity_Logo(input_alpha,
                                                 input_beta,
                                                 background_color='black',
                                                 Load_Prev_Data=False)

fig_rsl.savefig('flu_rsl.png', dpi=1200, facecolor='black')

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
sns.swarmplot(data=DTCRS.df_alpha_list[0], x='pos', y='high', ax=ax[0])
i = 0
ax[i].set_xlabel('')
ax[i].set_ylabel('')
ax[i].set_xticklabels(list(alpha), size=24)
ax[i].tick_params(axis='y', labelsize=18)
ax[i].spines['right'].set_visible(False)
ax[i].spines['top'].set_visible(False)
예제 #2
0
                contains('|'.join(remove), regex=True)]
remove = ['0', '\?', 'O', '9', '\*', 'B', 'X']
df_tcr = df_tcr[~df_tcr[cdr3_alpha_col].str.
                contains('|'.join(remove), regex=True)]
df_tcr = df_tcr[~df_tcr[cdr3_beta_col].str.
                contains('|'.join(remove), regex=True)]
df_tcr[cdr3_alpha_col] = df_tcr[cdr3_alpha_col].str.replace('[^\x00-\x7F]', '')
df_tcr[cdr3_beta_col] = df_tcr[cdr3_beta_col].str.replace('[^\x00-\x7F]', '')

temp = df_tcr[df_tcr[epitope_col] == epitope]
temp = temp.groupby([cdr3_alpha_col, cdr3_beta_col]).agg({
    epitope_col: 'first'
}).reset_index()
temp = temp[~temp['CDR3.alpha.aa'].str.contains('#')]
temp['seq_id'] = temp[cdr3_alpha_col] + '_' + temp[cdr3_beta_col]
temp = temp[~temp['seq_id'].isin(df_train_pep['seq_id'])]
out = DTCRS.Sequence_Inference(beta_sequences=np.array(temp[cdr3_beta_col]),
                               alpha_sequences=np.array(temp[cdr3_alpha_col]))
df_true = pd.DataFrame()
df_true['pred'] = np.squeeze(out)
df_true['label'] = 1.0

temp = df_tcr[df_tcr[epitope_col] != epitope]
out = DTCRS.Sequence_Inference(beta_sequences=np.array(temp[cdr3_beta_col]),
                               alpha_sequences=np.array(temp[cdr3_alpha_col]))
df_false = pd.DataFrame()
df_false['pred'] = np.squeeze(out)
df_false['label'] = 0.0
df_preds = pd.concat([df_true, df_false])

df_preds.to_csv('mart1_mcpas_val.csv', index=False)
예제 #3
0
matplotlib.rc('font', family='Arial')

#Instantiate training object
DTCRU = DeepTCR_SS('Murine_Sup')
#Load Data
# DTCRU.Get_Data(directory='../../Data/Murine_Antigens',Load_Prev_Data=False,
#                aa_column_beta=0,count_column=1,v_beta_column=2,j_beta_column=3,
#                classes=['Db-F2', 'Db-M45', 'Db-NP', 'Db-PA', 'Db-PB1'])
# DTCRU.Monte_Carlo_CrossVal(folds=5)

DTCR_inf = DeepTCR_SS('load')
DTCR_inf.Get_Data(directory='../../Data/Murine_Antigens',
                  Load_Prev_Data=False,
                  aa_column_beta=0,
                  count_column=1,
                  v_beta_column=2,
                  j_beta_column=3,
                  classes=['Kb-M38', 'Kb-SIY', 'Kb-TRP2', 'Kb-m139'])

beta_sequences = DTCR_inf.beta_sequences
v_beta = DTCR_inf.v_beta
j_beta = DTCR_inf.j_beta

out = DTCRU.Sequence_Inference(beta_sequences=beta_sequences,
                               v_beta=v_beta,
                               j_beta=j_beta)
out2 = DTCRU.Sequence_Inference(beta_sequences=beta_sequences,
                                v_beta=v_beta,
                                j_beta=j_beta)