from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_WF from sklearn.metrics import roc_curve, roc_auc_score import numpy as np from matplotlib import pyplot as plt #Train Sequence Classifier DTCR_SS = DeepTCR_SS('Rudqvist_SS', device='/device:GPU:0') DTCR_SS.Get_Data(directory='../../Data/Rudqvist', Load_Prev_Data=False, aa_column_beta=1, count_column=2, v_beta_column=7, d_beta_column=14, j_beta_column=21) DTCR_SS.K_Fold_CrossVal(folds=5) #Train Repertoire Classifier folds = 100 LOO = 4 epochs_min = 10 size_of_net = 'small' num_concepts = 64 hinge_loss_t = 0.1 train_loss_min = 0.1 seeds = np.array(range(folds)) graph_seed = 0 DTCR_WF = DeepTCR_WF('Rudqvist_WF', device='/device:GPU:0') DTCR_WF.Get_Data(directory='../../Data/Rudqvist', Load_Prev_Data=False,
import pandas as pd from DeepTCR.DeepTCR import DeepTCR_SS import numpy as np from scipy.stats import gaussian_kde import matplotlib.pyplot as plt import matplotlib matplotlib.rc('font', family='Arial') import pickle df = pd.read_csv('../../../Data/10x_Data/Data_Regression.csv') antigen = 'A0201_ELAGIGILTV_MART-1_Cancer' DTCRS = DeepTCR_SS('reg_mart1', device=2) #Get alpha/beta sequences alpha = np.asarray(df['alpha'].tolist()) beta = np.asarray(df['beta'].tolist()) i = np.where(df.columns == antigen)[0][0] sel = df.iloc[:, i] Y = np.log2(np.asarray(sel.tolist()) + 1) DTCRS.Load_Data(alpha_sequences=alpha, beta_sequences=beta, Y=Y) folds = 5 seeds = np.array(range(folds)) graph_seed = 0 DTCRS.K_Fold_CrossVal(split_by_sample=False, folds=folds, seeds=seeds, graph_seed=graph_seed) with open('mart1_preds.pkl', 'wb') as f: pickle.dump([antigen, np.squeeze(DTCRS.predicted), Y], f, protocol=4)
p = Pool(40) #Get alpha/beta sequences alpha = np.asarray(df['alpha'].tolist()) beta = np.asarray(df['beta'].tolist()) y_pred = [] y_test = [] antigen = [] #Iterate through all antigens for i in range(2,df.columns.shape[0]): print(df.iloc[:,i].name) sel = df.iloc[:,i] Y = np.log2(np.asarray(sel.tolist()) + 1) DTCRS.Load_Data(alpha_sequences=alpha, beta_sequences=beta, Y=Y,p=p) DTCRS.K_Fold_CrossVal(split_by_sample=False,folds=5) y_pred.append(DTCRS.y_pred) y_test.append(DTCRS.y_test) antigen.append([sel.name]*len(DTCRS.y_pred)) antigen = np.hstack(antigen) y_pred = np.vstack(y_pred) y_test = np.vstack(y_test) #Save Data df_out = pd.DataFrame() df_out['Antigen'] = antigen df_out['Y_Pred'] = y_pred df_out['Y_Test'] = y_test df_out.to_csv('Regression_Results.csv',index=False)
alpha = np.asarray(df['alpha'].tolist()) beta = np.asarray(df['beta'].tolist()) y_pred = [] y_test = [] antigen = [] folds = 5 seeds = np.array(range(folds)) graph_seed = 0 #Iterate through all antigens for i in range(2, df.columns.shape[0]): print(df.iloc[:, i].name) sel = df.iloc[:, i] Y = np.log2(np.asarray(sel.tolist()) + 1) DTCRS.Load_Data(alpha_sequences=alpha, beta_sequences=beta, Y=Y, p=p) DTCRS.K_Fold_CrossVal(folds=folds, seeds=seeds, graph_seed=graph_seed) y_pred.append(DTCRS.y_pred) y_test.append(DTCRS.y_test) antigen.append([sel.name] * len(DTCRS.y_pred)) antigen = np.hstack(antigen) y_pred = np.vstack(y_pred) y_test = np.vstack(y_test) #Save Data df_out = pd.DataFrame() df_out['Antigen'] = antigen df_out['Y_Pred'] = y_pred df_out['Y_Test'] = y_test df_out.to_csv('Regression_Results.csv', index=False)