import os
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import ttest_rel

dir_results = 'Repertoire_Classification_Results'
if not os.path.exists(dir_results):
    os.makedirs(dir_results)

# Instantiate training object
DTCRU = DeepTCR_U('Repertoire_Classification')

DTCRU.Get_Data(directory='../../Data/Rudqvist',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=1,
               count_column=2,
               v_beta_column=7,
               d_beta_column=14,
               j_beta_column=21)

# VAE-Gene
DTCRU.Train_VAE(use_only_gene=True)
d_vae_gene = squareform(pdist(DTCRU.features))
prop_vae_gene, _ = phenograph_clustering_freq(d_vae_gene, DTCRU)

# VAE-Seq
DTCRU.Train_VAE(use_only_seq=True)
d_vae_seq = squareform(pdist(DTCRU.features))
prop_vae_seq, _ = phenograph_clustering_freq(d_vae_seq, DTCRU)

# VAE-Seq-Gene
Exemple #2
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from scipy.stats import spearmanr
import seaborn as sns
from NN_Assessment_utils import *
import pickle
import os
from scipy.stats import ttest_ind

#Run VAE
DTCRU = DeepTCR_U('Sequence_C', device=1)
DTCRU.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)
graph_seed = 0
split_seed = 0
DTCRU.Train_VAE(Load_Prev_Data=False,
                graph_seed=graph_seed,
                split_seed=split_seed)
distances_vae_seq_gene = pdist(DTCRU.features, metric='euclidean')

distances_list = [distances_vae_seq_gene]
names = ['VAE-Seq-VDJ']

dir_results = 'sup_v_unsup_results'
if not os.path.exists(dir_results):
Exemple #3
0
in the GLIPH manuscript. The GLIPH.csv file used in this analysis can either be generated from the Run_Gliph.py
script or viewed in the github repository.
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from DeepTCR.DeepTCR import DeepTCR_U
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, fcluster

#Instantiate training object
DTCRU = DeepTCR_U('Glanville_v_Hamming')
DTCRU.Get_Data(directory='../../Data/Glanville/',
               Load_Prev_Data=False,
               aa_column_beta=1,
               aggregate_by_aa=False)

method_dim = 'Hamming'
distances_hamming = pdist(np.squeeze(DTCRU.X_Seq_beta, 1), metric='hamming')

#Clustering Thresholds
r = np.logspace(np.log10(0.001), np.log10(10), 50)

#Collect data for plots
x = []
y = []
total_seq = len(DTCRU.X_Seq_beta)
for t in r:
    print(t)
    Z = linkage(distances_hamming, method='ward')