Exemple #1
0
import os
from scipy.stats import ttest_ind

#Run VAE
DTCRU = DeepTCR_U('Sequence_C', device=1)
DTCRU.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)
graph_seed = 0
split_seed = 0
DTCRU.Train_VAE(Load_Prev_Data=False,
                graph_seed=graph_seed,
                split_seed=split_seed)
distances_vae_seq_gene = pdist(DTCRU.features, metric='euclidean')

distances_list = [distances_vae_seq_gene]
names = ['VAE-Seq-VDJ']

dir_results = 'sup_v_unsup_results'
if not os.path.exists(dir_results):
    os.makedirs(dir_results)

df_metrics = Assess_Performance_KNN(distances_list,
                                    names,
                                    DTCRU.class_id,
                                    dir_results,
                                    metrics=['AUC'])
    os.makedirs(dir_results)

# Instantiate training object
DTCRU = DeepTCR_U('Repertoire_Classification')

DTCRU.Get_Data(directory='../../Data/Rudqvist',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=1,
               count_column=2,
               v_beta_column=7,
               d_beta_column=14,
               j_beta_column=21)

# VAE-Gene
DTCRU.Train_VAE(use_only_gene=True)
d_vae_gene = squareform(pdist(DTCRU.features))
prop_vae_gene, _ = phenograph_clustering_freq(d_vae_gene, DTCRU)

# VAE-Seq
DTCRU.Train_VAE(use_only_seq=True)
d_vae_seq = squareform(pdist(DTCRU.features))
prop_vae_seq, _ = phenograph_clustering_freq(d_vae_seq, DTCRU)

# VAE-Seq-Gene
DTCRU.Train_VAE(Load_Prev_Data=False)
d_vae_seq_gene = squareform(pdist(DTCRU.features))
prop_vae_seq_gene, _ = phenograph_clustering_freq(d_vae_seq_gene, DTCRU)

# Hamming
d_hamming = squareform(pdist(np.squeeze(DTCRU.X_Seq_beta, 1),
Exemple #3
0
# Instantiate training object
DTCRU = DeepTCR_U('Rep_Dendrogram', device='/device:GPU:1')

#Load Data from directories
DTCRU.Get_Data(directory='../../Data/Rudqvist',
               Load_Prev_Data=True,
               aggregate_by_aa=True,
               aa_column_beta=1,
               count_column=2,
               v_beta_column=7,
               d_beta_column=14,
               j_beta_column=21)

#Train VAE
DTCRU.Train_VAE(accuracy_min=0.9, Load_Prev_Data=True)

#Create Repertoire Dendrogram
color_dict = {
    'Control': 'limegreen',
    '9H10': 'red',
    'RT': 'darkorange',
    'Combo': 'magenta'
}
DTCRU.Repertoire_Dendrogram(n_jobs=40,
                            distance_metric='KL',
                            log_scale=True,
                            dendrogram_radius=0.28,
                            repertoire_radius=0.35,
                            Load_Prev_Data=True,
                            gridsize=60,
from DeepTCR.DeepTCR import DeepTCR_U

# Instantiate training object
DTCRU = DeepTCR_U('Rep_Dendrogram',device='/gpu:2')

#Load Data from directories
DTCRU.Get_Data(directory='../../Data/Rudqvist',Load_Prev_Data=False,aggregate_by_aa=True,
               aa_column_beta=1,count_column=2,v_beta_column=7,d_beta_column=14,j_beta_column=21)

#Train VAE
DTCRU.Train_VAE(accuracy_min=0.9)
color_dict = {'Control':'limegreen','9H10':'red','RT':'darkorange','Combo':'magenta'}
DTCRU.Repertoire_Dendrogram(n_jobs=40,distance_metric='KL',
                           dendrogram_radius=0.28,repertoire_radius=0.35,Load_Prev_Data=True,gridsize=6,
                            color_dict=color_dict)
v_beta = DTCR.v_beta
j_beta = DTCR.j_beta
d_beta = DTCR.d_beta
hla = DTCR.hla_data_seq
sample_id = DTCR.sample_id

file = 'cm038_x2_u.pkl'
featurize = False
if featurize:
    DTCR_U = DeepTCR_U('test_hum', device='/device:GPU:6')
    DTCR_U.Load_Data(beta_sequences=beta_sequences,
                     v_beta=v_beta,
                     d_beta=d_beta,
                     j_beta=j_beta,
                     hla=hla)
    DTCR_U.Train_VAE(Load_Prev_Data=False, latent_dim=64, stop_criterion=0.01)
    X_2 = umap.UMAP().fit_transform(DTCR_U.features)
    with open(file, 'wb') as f:
        pickle.dump(X_2, f, protocol=4)
else:
    with open(file, 'rb') as f:
        X_2 = pickle.load(f)

df_plot['x'] = X_2[:, 0]
df_plot['y'] = X_2[:, 1]


def histogram_2d_cohort(d, w, grid_size):
    # center of data
    d_center = np.mean(np.concatenate(d, axis=0), axis=0)
    # largest radius
# Assess ability for structural entropy to be of measure of number of antigens
classes_all = np.array([
    'Db-F2', 'Kb-M38', 'Db-M45', 'Db-NP', 'Db-PA', 'Db-PB1', 'Kb-m139',
    'Kb-SIY', 'Kb-TRP2'
])

DTCRU.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

# VAE-Gene
DTCRU.Train_VAE(use_only_gene=True)
d_vae_gene = squareform(pdist(DTCRU.features))

# VAE-Seq
DTCRU.Train_VAE(use_only_seq=True)
d_vae_seq = squareform(pdist(DTCRU.features))

# VAE-Seq-Gene
DTCRU.Train_VAE()
d_vae_seq_gene = squareform(pdist(DTCRU.features))

# Hamming
d_hamming = squareform(pdist(np.squeeze(DTCRU.X_Seq_beta, 1),
                             metric='hamming'))

# Kmer