コード例 #1
0
ファイル: Sup_Seq_ROC.py プロジェクト: BlingArida/DeepTCR
"""Figure 2B"""
"""This script is used to create the ROC curves for assessing the ability
of supervised sequence classifier to correctly predict the antigen-specificity of 
the 9 murine antigens in the manuscript.."""

from DeepTCR.DeepTCR import DeepTCR_SS

#Run Supervised Sequence Classifier
DTCRS = DeepTCR_SS('Sequence_C')
DTCRS.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)
DTCRS.Monte_Carlo_CrossVal(folds=10)
DTCRS.AUC_Curve()
コード例 #2
0
            DTCR.Get_Data(directory='../../Data/Zhang/' + a,
                          aa_column_alpha=0,
                          p=p)
        elif o == 'beta':
            DTCR = DeepTCR_SS('alpha_v_beta_SS')
            DTCR.Get_Data(directory='../../Data/Zhang/' + a,
                          aa_column_beta=1,
                          p=p)
        elif o == 'alpha_beta':
            DTCR = DeepTCR_SS('alpha_v_beta_SS')
            DTCR.Get_Data(directory='../../Data/Zhang/' + a,
                          aa_column_alpha=0,
                          aa_column_beta=1,
                          p=p)

        DTCR.Monte_Carlo_CrossVal(folds=50, weight_by_class=True)
        y_pred_list.append(DTCR.y_pred)
        y_test_list.append(DTCR.y_test)

    plt.figure()
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=14)
    plt.ylabel('True Positive Rate', fontsize=14)
    for ii, o in enumerate(opt, 0):
        y_test = y_test_list[ii]
        y_pred = y_pred_list[ii]
        roc_score = roc_auc_score(y_test[:, 1], y_pred[:, 1])
        fpr, tpr, _ = roc_curve(y_test[:, 1], y_pred[:, 1])
        plt.plot(fpr, tpr, lw=2, label='%s (area = %0.4f)' % (o, roc_score))
コード例 #3
0
of supervised sequence classifier to correctly predict the antigen-specificity of 
the 9 murine antigens in the manuscript.."""

from DeepTCR.DeepTCR import DeepTCR_SS
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rc('font', family='Arial')

#Run Supervised Sequence Classifier
DTCRS = DeepTCR_SS('Sequence_C', device=2)

DTCRS.Get_Data(directory='../../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

folds = 10
seeds = np.array(range(folds))
graph_seed = 0
DTCRS.Monte_Carlo_CrossVal(folds=folds, seeds=seeds, graph_seed=graph_seed)
DTCRS.AUC_Curve(xlabel_size=24,
                ylabel_size=24,
                xtick_size=18,
                ytick_size=18,
                legend_font_size=14,
                frameon=False,
                diag_line=False)
コード例 #4
0
ファイル: Sup_Motifs.py プロジェクト: BlingArida/DeepTCR
from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_U
import numpy as np
import seaborn as sns

#Run Supervised Sequence Classifier
DTCRS = DeepTCR_SS('Sequence_C')
DTCRS.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

DTCRS.Monte_Carlo_CrossVal(folds=10, stop_criterion=0.01)
DTCRS.Representative_Sequences(top_seq=10, unique=True)
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

for item in DTCRS.Rep_Seq:
    break
    t = DTCRS.Rep_Seq[item]
    t = t.groupby(['beta']).agg({item: 'first'})
    t = t.sort_values(by=item, ascending=False)
    t.reset_index(inplace=True)
    seq = t['beta'].tolist()
    seq = seq[:10]
    out = []
コード例 #5
0
from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_WF

#Train Sequence Classifier
DTCR_SS = DeepTCR_SS('Rudqvist')
DTCR_SS.Get_Data(directory='../../Data/Rudqvist',
                 Load_Prev_Data=False,
                 aggregate_by_aa=True,
                 aa_column_beta=1,
                 count_column=2,
                 v_beta_column=7,
                 d_beta_column=14,
                 j_beta_column=21)

DTCR_SS.Monte_Carlo_CrossVal(folds=100, test_size=0.25)
DTCR_SS.AUC_Curve()

#Train Repertoire Classifier without on-graph clustering
DTCR_WF = DeepTCR_WF('Rudqvist')
DTCR_WF.Get_Data(directory='../../Data/Rudqvist',
                 Load_Prev_Data=False,
                 aggregate_by_aa=True,
                 aa_column_beta=1,
                 count_column=2,
                 v_beta_column=7,
                 d_beta_column=14,
                 j_beta_column=21)

DTCR_WF.Monte_Carlo_CrossVal(folds=100, LOO=4, epochs_min=50)
DTCR_WF.AUC_Curve()

#Train Repertoire Classifier with on-graph clustering
コード例 #6
0
count_train = []
for s, seq_cl, p, c in zip(sequences, seq_class_labels, predicted, counts):
    sel_idx = p > thresh
    seq_train.append(s[sel_idx])
    label_train.append(seq_cl[sel_idx])
    count_train.append(c[sel_idx])

seq_train = np.hstack(seq_train)
label_train = np.hstack(label_train)
count_train = np.hstack(count_train)

#Train Sequence Classifier
DTCR = DeepTCR_SS('tw10_seq', device=gpu)
DTCR.Load_Data(beta_sequences=seq_train, class_labels=label_train)
DTCR.Monte_Carlo_CrossVal(folds=folds,
                          graph_seed=graph_seed,
                          seeds=seeds,
                          convergence='training')
y_pred = DTCR.predicted
y_test = DTCR.Y
plt.figure(figsize=(6, 5))
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
for ii, cl in enumerate(DTCR.lb.classes_, 0):
    fpr, tpr, _ = roc_curve(y_test[:, ii], y_pred[:, ii])
    roc_score = roc_auc_score(y_test[:, ii], y_pred[:, ii])
    label = '%s = %0.3f' % (cl, roc_score)
    plt.plot(fpr, tpr, lw=2, label=label)
plt.legend(loc='lower right', frameon=False, prop={'size': 10})
ax = plt.gca()
コード例 #7
0
ファイル: alpha_v_beta.py プロジェクト: tnakada/DeepTCR
seeds = np.array(range(folds))
for a in antigens:
    y_pred_list = []
    y_test_list = []
    for o in opt:
        if o == 'alpha':
            DTCR = DeepTCR_SS('alpha_v_beta_SS')
            DTCR.Get_Data(directory='../../Data/Zhang/'+a,aa_column_alpha=0,p=p)
        elif o == 'beta':
            DTCR = DeepTCR_SS('alpha_v_beta_SS')
            DTCR.Get_Data(directory='../../Data/Zhang/'+a,aa_column_beta=1,p=p)
        elif o == 'alpha_beta':
            DTCR = DeepTCR_SS('alpha_v_beta_SS')
            DTCR.Get_Data(directory='../../Data/Zhang/'+a,aa_column_alpha=0,aa_column_beta=1,p=p)

        DTCR.Monte_Carlo_CrossVal(folds=folds,weight_by_class=True,graph_seed=graph_seed,seeds=seeds)
        y_pred_list.append(DTCR.y_pred)
        y_test_list.append(DTCR.y_test)

    plt.figure()
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate',fontsize=24)
    plt.ylabel('True Positive Rate',fontsize=24)
    for ii, o in enumerate(opt, 0):
        y_test = y_test_list[ii]
        y_pred = y_pred_list[ii]
        roc_score = roc_auc_score(y_test[:, 1], y_pred[:, 1])
        fpr, tpr, _ = roc_curve(y_test[:, 1], y_pred[:, 1])
        plt.plot(fpr, tpr, lw=2, label='%s (area = %0.4f)' % (o, roc_score))