from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_U import pandas as pd import seaborn as sns import numpy as np import matplotlib.pyplot as plt from scipy.spatial.distance import pdist, squareform from scipy.stats import spearmanr import seaborn as sns from NN_Assessment_utils import * import pickle import os from scipy.stats import ttest_ind #Run VAE DTCRU = DeepTCR_U('Sequence_C', device='/gpu:1') DTCRU.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) DTCRU.Train_VAE(Load_Prev_Data=False, use_only_gene=True) distances_vae_gene = pdist(DTCRU.features, metric='euclidean') # #VAE_- Sequencs Alone+ DTCRU.Train_VAE(Load_Prev_Data=False, use_only_seq=True) distances_vae_seq = pdist(DTCRU.features, metric='euclidean') DTCRU.Train_VAE(Load_Prev_Data=False)
from DeepTCR.DeepTCR import DeepTCR_U import numpy as np import matplotlib.pyplot as plt import pandas as pd from scipy.spatial.distance import pdist, squareform from scipy.stats import spearmanr import seaborn as sns from NN_Assessment_utils import * import pickle import os import matplotlib matplotlib.rc('font', family='Arial') #Instantiate training object DTCRU = DeepTCR_U('Murine_U') #Load Data DTCRU.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=False, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) #Get distances from various methods #VAE_- Genes DTCRU.Train_VAE(Load_Prev_Data=False, use_only_gene=True) distances_vae_gene = pdist(DTCRU.features, metric='euclidean') # #VAE_- Sequencs Alone+ DTCRU.Train_VAE(Load_Prev_Data=False, use_only_seq=True)
"""This script is used to characterize the performance of various featurization methods on TCRSeq data from 7 Human Antigens.""" from DeepTCR.DeepTCR import DeepTCR_U import numpy as np import matplotlib.pyplot as plt from scipy.spatial.distance import pdist, squareform import seaborn as sns from NN_Assessment_utils import * import pickle import os import pandas as pd #Instantiate training object DTCRU = DeepTCR_U('Human_U') #Load Data DTCRU.Get_Data(directory='../../Data/Human_Antigens',Load_Prev_Data=False,aggregate_by_aa=True, aa_column_beta=0,count_column=1,v_beta_column=2,j_beta_column=3) #Get distances from various methods #VAE_- Genes DTCRU.Train_VAE(Load_Prev_Data=False,use_only_gene=True) distances_vae_gene = pdist(DTCRU.features, metric='euclidean') # #VAE_- Sequencs Alone DTCRU.Train_VAE(Load_Prev_Data=False,use_only_seq=True) distances_vae_seq = pdist(DTCRU.features, metric='euclidean') #VAE_- Gene+Sequencs DTCRU.Train_VAE(Load_Prev_Data=False)
import numpy as np import seaborn as sns from scipy.spatial.distance import pdist, squareform from NN_Assessment_utils import * import pickle import os import matplotlib.pyplot as plt import pandas as pd from scipy.stats import ttest_rel dir_results = 'Repertoire_Classification_Results' if not os.path.exists(dir_results): os.makedirs(dir_results) # Instantiate training object DTCRU = DeepTCR_U('Repertoire_Classification') DTCRU.Get_Data(directory='../../Data/Rudqvist', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=1, count_column=2, v_beta_column=7, d_beta_column=14, j_beta_column=21) # VAE-Gene DTCRU.Train_VAE(use_only_gene=True) d_vae_gene = squareform(pdist(DTCRU.features)) prop_vae_gene, _ = phenograph_clustering_freq(d_vae_gene, DTCRU)
from DeepTCR.DeepTCR import DeepTCR_U # Instantiate training object DTCRU = DeepTCR_U('Rep_Dendrogram',device='/gpu:2') #Load Data from directories DTCRU.Get_Data(directory='../../Data/Rudqvist',Load_Prev_Data=False,aggregate_by_aa=True, aa_column_beta=1,count_column=2,v_beta_column=7,d_beta_column=14,j_beta_column=21) #Train VAE DTCRU.Train_VAE(accuracy_min=0.9) color_dict = {'Control':'limegreen','9H10':'red','RT':'darkorange','Combo':'magenta'} DTCRU.Repertoire_Dendrogram(n_jobs=40,distance_metric='KL', dendrogram_radius=0.28,repertoire_radius=0.35,Load_Prev_Data=True,gridsize=6, color_dict=color_dict)
plt.yticks([]) plt.xlabel('') plt.ylabel('') plt.show() beta_sequences = DTCR.beta_sequences v_beta = DTCR.v_beta j_beta = DTCR.j_beta d_beta = DTCR.d_beta hla = DTCR.hla_data_seq sample_id = DTCR.sample_id file = 'cm038_x2_u.pkl' featurize = False if featurize: DTCR_U = DeepTCR_U('test_hum', device='/device:GPU:6') DTCR_U.Load_Data(beta_sequences=beta_sequences, v_beta=v_beta, d_beta=d_beta, j_beta=j_beta, hla=hla) DTCR_U.Train_VAE(Load_Prev_Data=False, latent_dim=64, stop_criterion=0.01) X_2 = umap.UMAP().fit_transform(DTCR_U.features) with open(file, 'wb') as f: pickle.dump(X_2, f, protocol=4) else: with open(file, 'rb') as f: X_2 = pickle.load(f) df_plot['x'] = X_2[:, 0] df_plot['y'] = X_2[:, 1]
This script compares a hamming distance (scipy.spatial.distance.pdist) to GLIPH's clustering algorithm with different hamming distance thresholds to compare clustering accuracy as previously demonstrated in the GLIPH manuscript. The GLIPH.csv file used in this analysis can either be generated from the Run_Gliph.py script or viewed in the github repository. """ import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from DeepTCR.DeepTCR import DeepTCR_U from scipy.spatial.distance import pdist from scipy.cluster.hierarchy import linkage,fcluster #Instantiate training object DTCRU = DeepTCR_U('Glanville_v_Hamming') DTCRU.Get_Data(directory='../../Data/Glanville/',Load_Prev_Data=False,aa_column_beta=1,aggregate_by_aa=False) method_dim = 'Hamming' distances_hamming = pdist(np.squeeze(DTCRU.X_Seq_beta, 1), metric='hamming') #Clustering Thresholds r = np.logspace(np.log10(0.001), np.log10(10), 50) #Collect data for plots x = [] y = [] total_seq = len(DTCRU.X_Seq_beta) for t in r: print(t) Z = linkage(distances_hamming, method='complete')