from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_U import numpy as np import seaborn as sns #Run Supervised Sequence Classifier DTCRS = DeepTCR_SS('Sequence_C') DTCRS.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) DTCRS.Monte_Carlo_CrossVal(folds=10, stop_criterion=0.01) DTCRS.Representative_Sequences(top_seq=10, unique=True) from Bio import SeqIO from Bio.SeqRecord import SeqRecord from Bio.Seq import Seq from Bio.Alphabet import IUPAC for item in DTCRS.Rep_Seq: break t = DTCRS.Rep_Seq[item] t = t.groupby(['beta']).agg({item: 'first'}) t = t.sort_values(by=item, ascending=False) t.reset_index(inplace=True) seq = t['beta'].tolist() seq = seq[:10] out = [] for s in seq:
#Run Supervised Sequence Classifier DTCRS = DeepTCR_SS('Sequence_C', device=6) DTCRS.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=True, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) folds = 100 seeds = np.array(range(folds)) graph_seed = 0 DTCRS.Monte_Carlo_CrossVal(folds=folds, graph_seed=graph_seed, seeds=seeds) DTCRS.Representative_Sequences(top_seq=25, motif_seq=10, color_scheme='hydrophobicity') dir = 'Murine_Rep_Sequences' if os.path.exists(dir): shutil.rmtree(dir) os.makedirs(dir) for item in DTCRS.Rep_Seq: t = DTCRS.Rep_Seq[item] t = t.groupby(['beta']).agg({item: 'first'}) t = t.sort_values(by=item, ascending=False) t.reset_index(inplace=True) seq = t['beta'].tolist() seq = seq[:10] out = []
from DeepTCR.DeepTCR import DeepTCR_SS from multiprocessing import Pool import matplotlib.pyplot as plt from sklearn.metrics import roc_auc_score, roc_curve import os p = Pool(80) dir_results = 'alpha_v_beta_results' if not os.path.exists(dir_results): os.makedirs(dir_results) antigens = [ 'GANAB-S5F', 'ATP6AP1-KLG_G3W', 'CMV-MLN', 'GNL3L-R4C', 'MART1-A2L', 'YFV-LLW' ] for a in antigens: DTCR = DeepTCR_SS(a + 'Rep') DTCR.Get_Data(directory='../../Data/Zhang/' + a, aa_column_alpha=0, aa_column_beta=1, p=p) DTCR.Monte_Carlo_CrossVal(folds=50, weight_by_class=True) DTCR.Representative_Sequences()
fig, ax = plt.subplots(figsize=(5, 5)) x = X_2[sel_idx, 0] y = X_2[sel_idx, 1] x, y, c, _, _ = GKDE(x, y) ax.scatter(x, y, c=c, cmap='jet', s=100) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title(l, fontsize=36) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() fig.savefig(os.path.join(dir_write, l + '.png'), dpi=1200) plt.close() #Get Residue Sensitivity Logo for select epitopes DTCR.Representative_Sequences(top_seq=100, make_seq_logos=False) test_peptide = 'TSTLQEQIGW' rep_seq = DTCR.Rep_Seq[test_peptide]['beta'][0:10] models = np.random.choice(range(100), 5, replace=False) models = ['model_' + str(x) for x in models] models = None DTCR.Residue_Sensitivity_Logo(beta_sequences=np.array(rep_seq), models=models, class_sel=test_peptide, Load_Prev_Data=False, background_color='black', edgewidth=0.0, figsize=(3, 4), min_size=0.25, norm_to_seq=True) plt.savefig(test_peptide + '.png', dpi=1200)