import os import matplotlib.pyplot as plt import pandas as pd from scipy.stats import ttest_rel dir_results = 'Repertoire_Classification_Results' if not os.path.exists(dir_results): os.makedirs(dir_results) # Instantiate training object DTCRU = DeepTCR_U('Repertoire_Classification') DTCRU.Get_Data(directory='../../Data/Rudqvist', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=1, count_column=2, v_beta_column=7, d_beta_column=14, j_beta_column=21) # VAE-Gene DTCRU.Train_VAE(use_only_gene=True) d_vae_gene = squareform(pdist(DTCRU.features)) prop_vae_gene, _ = phenograph_clustering_freq(d_vae_gene, DTCRU) # VAE-Seq DTCRU.Train_VAE(use_only_seq=True) d_vae_seq = squareform(pdist(DTCRU.features)) prop_vae_seq, _ = phenograph_clustering_freq(d_vae_seq, DTCRU) # VAE-Seq-Gene
import numpy as np import matplotlib.pyplot as plt from scipy.spatial.distance import pdist, squareform from scipy.stats import spearmanr import seaborn as sns from NN_Assessment_utils import * import pickle import os from scipy.stats import ttest_ind #Run VAE DTCRU = DeepTCR_U('Sequence_C', device=1) DTCRU.Get_Data(directory='../../Data/Murine_Antigens', Load_Prev_Data=False, aggregate_by_aa=True, aa_column_beta=0, count_column=1, v_beta_column=2, j_beta_column=3) graph_seed = 0 split_seed = 0 DTCRU.Train_VAE(Load_Prev_Data=False, graph_seed=graph_seed, split_seed=split_seed) distances_vae_seq_gene = pdist(DTCRU.features, metric='euclidean') distances_list = [distances_vae_seq_gene] names = ['VAE-Seq-VDJ'] dir_results = 'sup_v_unsup_results' if not os.path.exists(dir_results):
in the GLIPH manuscript. The GLIPH.csv file used in this analysis can either be generated from the Run_Gliph.py script or viewed in the github repository. """ import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from DeepTCR.DeepTCR import DeepTCR_U from scipy.spatial.distance import pdist from scipy.cluster.hierarchy import linkage, fcluster #Instantiate training object DTCRU = DeepTCR_U('Glanville_v_Hamming') DTCRU.Get_Data(directory='../../Data/Glanville/', Load_Prev_Data=False, aa_column_beta=1, aggregate_by_aa=False) method_dim = 'Hamming' distances_hamming = pdist(np.squeeze(DTCRU.X_Seq_beta, 1), metric='hamming') #Clustering Thresholds r = np.logspace(np.log10(0.001), np.log10(10), 50) #Collect data for plots x = [] y = [] total_seq = len(DTCRU.X_Seq_beta) for t in r: print(t) Z = linkage(distances_hamming, method='ward')