"""Figure 2B""" """This script is used to create the ROC curves for assessing the ability of supervised sequence classifier to correctly predict the antigen-specificity of the 9 murine antigens in the manuscript..""" from DeepTCR.DeepTCR import DeepTCR_SS #Run Supervised Sequence Classifier DTCRS = DeepTCR_SS('Sequence_C') DTCRS.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) DTCRS.Monte_Carlo_CrossVal(folds=10) DTCRS.AUC_Curve()
DTCR.Get_Data(directory='../../Data/Zhang/' + a, aa_column_alpha=0, p=p) elif o == 'beta': DTCR = DeepTCR_SS('alpha_v_beta_SS') DTCR.Get_Data(directory='../../Data/Zhang/' + a, aa_column_beta=1, p=p) elif o == 'alpha_beta': DTCR = DeepTCR_SS('alpha_v_beta_SS') DTCR.Get_Data(directory='../../Data/Zhang/' + a, aa_column_alpha=0, aa_column_beta=1, p=p) DTCR.Monte_Carlo_CrossVal(folds=50, weight_by_class=True) y_pred_list.append(DTCR.y_pred) y_test_list.append(DTCR.y_test) plt.figure() plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate', fontsize=14) plt.ylabel('True Positive Rate', fontsize=14) for ii, o in enumerate(opt, 0): y_test = y_test_list[ii] y_pred = y_pred_list[ii] roc_score = roc_auc_score(y_test[:, 1], y_pred[:, 1]) fpr, tpr, _ = roc_curve(y_test[:, 1], y_pred[:, 1]) plt.plot(fpr, tpr, lw=2, label='%s (area = %0.4f)' % (o, roc_score))
of supervised sequence classifier to correctly predict the antigen-specificity of the 9 murine antigens in the manuscript..""" from DeepTCR.DeepTCR import DeepTCR_SS import numpy as np import matplotlib.pyplot as plt import matplotlib matplotlib.rc('font', family='Arial') #Run Supervised Sequence Classifier DTCRS = DeepTCR_SS('Sequence_C', device=2) DTCRS.Get_Data(directory='../../../Data/Murine_Antigens', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) folds = 10 seeds = np.array(range(folds)) graph_seed = 0 DTCRS.Monte_Carlo_CrossVal(folds=folds, seeds=seeds, graph_seed=graph_seed) DTCRS.AUC_Curve(xlabel_size=24, ylabel_size=24, xtick_size=18, ytick_size=18, legend_font_size=14, frameon=False, diag_line=False)
from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_U import numpy as np import seaborn as sns #Run Supervised Sequence Classifier DTCRS = DeepTCR_SS('Sequence_C') DTCRS.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) DTCRS.Monte_Carlo_CrossVal(folds=10, stop_criterion=0.01) DTCRS.Representative_Sequences(top_seq=10, unique=True) from Bio import SeqIO from Bio.SeqRecord import SeqRecord from Bio.Seq import Seq from Bio.Alphabet import IUPAC for item in DTCRS.Rep_Seq: break t = DTCRS.Rep_Seq[item] t = t.groupby(['beta']).agg({item: 'first'}) t = t.sort_values(by=item, ascending=False) t.reset_index(inplace=True) seq = t['beta'].tolist() seq = seq[:10] out = []
from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_WF #Train Sequence Classifier DTCR_SS = DeepTCR_SS('Rudqvist') DTCR_SS.Get_Data(directory='../../Data/Rudqvist', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=1, count_column=2, v_beta_column=7, d_beta_column=14, j_beta_column=21) DTCR_SS.Monte_Carlo_CrossVal(folds=100, test_size=0.25) DTCR_SS.AUC_Curve() #Train Repertoire Classifier without on-graph clustering DTCR_WF = DeepTCR_WF('Rudqvist') DTCR_WF.Get_Data(directory='../../Data/Rudqvist', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=1, count_column=2, v_beta_column=7, d_beta_column=14, j_beta_column=21) DTCR_WF.Monte_Carlo_CrossVal(folds=100, LOO=4, epochs_min=50) DTCR_WF.AUC_Curve() #Train Repertoire Classifier with on-graph clustering
count_train = [] for s, seq_cl, p, c in zip(sequences, seq_class_labels, predicted, counts): sel_idx = p > thresh seq_train.append(s[sel_idx]) label_train.append(seq_cl[sel_idx]) count_train.append(c[sel_idx]) seq_train = np.hstack(seq_train) label_train = np.hstack(label_train) count_train = np.hstack(count_train) #Train Sequence Classifier DTCR = DeepTCR_SS('tw10_seq', device=gpu) DTCR.Load_Data(beta_sequences=seq_train, class_labels=label_train) DTCR.Monte_Carlo_CrossVal(folds=folds, graph_seed=graph_seed, seeds=seeds, convergence='training') y_pred = DTCR.predicted y_test = DTCR.Y plt.figure(figsize=(6, 5)) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') for ii, cl in enumerate(DTCR.lb.classes_, 0): fpr, tpr, _ = roc_curve(y_test[:, ii], y_pred[:, ii]) roc_score = roc_auc_score(y_test[:, ii], y_pred[:, ii]) label = '%s = %0.3f' % (cl, roc_score) plt.plot(fpr, tpr, lw=2, label=label) plt.legend(loc='lower right', frameon=False, prop={'size': 10}) ax = plt.gca()
seeds = np.array(range(folds)) for a in antigens: y_pred_list = [] y_test_list = [] for o in opt: if o == 'alpha': DTCR = DeepTCR_SS('alpha_v_beta_SS') DTCR.Get_Data(directory='../../Data/Zhang/'+a,aa_column_alpha=0,p=p) elif o == 'beta': DTCR = DeepTCR_SS('alpha_v_beta_SS') DTCR.Get_Data(directory='../../Data/Zhang/'+a,aa_column_beta=1,p=p) elif o == 'alpha_beta': DTCR = DeepTCR_SS('alpha_v_beta_SS') DTCR.Get_Data(directory='../../Data/Zhang/'+a,aa_column_alpha=0,aa_column_beta=1,p=p) DTCR.Monte_Carlo_CrossVal(folds=folds,weight_by_class=True,graph_seed=graph_seed,seeds=seeds) y_pred_list.append(DTCR.y_pred) y_test_list.append(DTCR.y_test) plt.figure() plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate',fontsize=24) plt.ylabel('True Positive Rate',fontsize=24) for ii, o in enumerate(opt, 0): y_test = y_test_list[ii] y_pred = y_pred_list[ii] roc_score = roc_auc_score(y_test[:, 1], y_pred[:, 1]) fpr, tpr, _ = roc_curve(y_test[:, 1], y_pred[:, 1]) plt.plot(fpr, tpr, lw=2, label='%s (area = %0.4f)' % (o, roc_score))